Exemplo n.º 1
0
SparseArray<T> sparseConvertDenseToStorage(const Array<T> &in_)
{
    in_.eval();

    // MKL only has dns->csr.
    // CSR <-> CSC is only supported if input is square
    uint nNZ = reduce_all<af_notzero_t, T, uint>(in_);

    SparseArray<T> sparse_ = createEmptySparseArray<T>(in_.dims(), nNZ, AF_STORAGE_CSR);
    sparse_.eval();

    auto func = [=] (SparseArray<T> sparse, const Array<T> in) {
        // Read: https://software.intel.com/en-us/node/520848
        // But job description is incorrect with regards to job[1]
        // 0 implies row major and 1 implies column major
        int j1 = 1, j2 = 0;
        const int job[] = {0, j1, j2, 2, (int)sparse.elements(), 1};

        const int M = in.dims()[0];
        const int N = in.dims()[1];

        int ldd = in.strides()[1];

        int info = 0;

        // Have to mess up all const correctness because MKL dnscsr function
        // is bidirectional and has input/output on all pointers
        Array<T  > &values = sparse.getValues();
        Array<int> &rowIdx = sparse.getRowIdx();
        Array<int> &colIdx = sparse.getColIdx();

        dnscsr_func<T>()(
                job, &M, &N,
                reinterpret_cast<ptr_type<T>>(const_cast<T*>(in.get())), &ldd,
                reinterpret_cast<ptr_type<T>>(values.get()),
                colIdx.get(),
                rowIdx.get(),
                &info);
    };

    getQueue().enqueue(func, sparse_, in_);

    if(stype == AF_STORAGE_CSR)
        return sparse_;
    else
        AF_ERROR("CPU Backend only supports Dense to CSR or COO", AF_ERR_NOT_SUPPORTED);

    return sparse_;
}
Exemplo n.º 2
0
Array<T> sparseConvertCOOToDense(const SparseArray<T> &in)
{
    in.eval();

    Array<T> dense = createValueArray<T>(in.dims(), scalar<T>(0));
    dense.eval();

    const Array<T>   values = in.getValues();
    const Array<int> rowIdx = in.getRowIdx();
    const Array<int> colIdx = in.getColIdx();

    getQueue().enqueue(kernel::coo2dense<T>, dense, values, rowIdx, colIdx);

    return dense;
}
Exemplo n.º 3
0
    static void bcast_dim_launcher(Param &out,
                                   Param &tmp,
                                   const uint groups_all[4])
    {

        Kernel ker = get_scan_dim_kernels<Ti, To, op, dim, isFinalPass, threads_y>(1);

        NDRange local(THREADS_X, threads_y);
        NDRange global(groups_all[0] * groups_all[2] * local[0],
                       groups_all[1] * groups_all[3] * local[1]);

        uint lim = divup(out.info.dims[dim], (threads_y * groups_all[dim]));

        auto bcastOp = make_kernel<Buffer, KParam,
                                   Buffer, KParam,
                                   uint, uint,
                                   uint, uint>(ker);

        bcastOp(EnqueueArgs(getQueue(), global, local),
                out.data, out.info, tmp.data, tmp.info,
                groups_all[0], groups_all[1], groups_all[dim], lim);

        CL_DEBUG_FINISH(getQueue());
    }
Exemplo n.º 4
0
Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
                const T * const in_data, bool is_device) :
    info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
    data(is_device ? (T*)in_data : memAlloc<T>(info.total()).release(), memFree<T>),
    data_dims(dims),
    node(bufferNodePtr<T>()),
    ready(true),
    owner(true)
{
    if (!is_device) {
        // Ensure the memory being written to isnt used anywhere else.
        getQueue().sync();
        copy(in_data, in_data + info.total(), data.get());
    }
}
Exemplo n.º 5
0
Array<T> rotate(const Array<T> &in, const float theta, const af::dim4 &odims,
                 const af_interp_type method)
{
    in.eval();

    Array<T> out = createEmptyArray<T>(odims);

    switch(method) {
        case AF_INTERP_NEAREST:
            getQueue().enqueue(kernel::rotate<T, AF_INTERP_NEAREST>, out, in, theta);
            break;
        case AF_INTERP_BILINEAR:
            getQueue().enqueue(kernel::rotate<T, AF_INTERP_BILINEAR>, out, in, theta);
            break;
        case AF_INTERP_LOWER:
            getQueue().enqueue(kernel::rotate<T, AF_INTERP_LOWER>, out, in, theta);
            break;
        default:
            AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
            break;
    }

    return out;
}
Exemplo n.º 6
0
        void sort0(Param val)
        {
            try {
                compute::command_queue c_queue(getQueue()());

                compute::buffer val_buf((*val.data)());

                for(int w = 0; w < val.info.dims[3]; w++) {
                    int valW = w * val.info.strides[3];
                    for(int z = 0; z < val.info.dims[2]; z++) {
                        int valWZ = valW + z * val.info.strides[2];
                        for(int y = 0; y < val.info.dims[1]; y++) {

                            int valOffset = valWZ + y * val.info.strides[1];

                            if(isAscending) {
                                compute::stable_sort(
                                        compute::make_buffer_iterator<T>(val_buf, valOffset),
                                        compute::make_buffer_iterator<T>(val_buf, valOffset + val.info.dims[0]),
                                        compute::less<T>(), c_queue);
                            } else {
                                compute::stable_sort(
                                        compute::make_buffer_iterator<T>(val_buf, valOffset),
                                        compute::make_buffer_iterator<T>(val_buf, valOffset + val.info.dims[0]),
                                        compute::greater<T>(), c_queue);
                            }
                        }
                    }
                }

                CL_DEBUG_FINISH(getQueue());
            } catch (cl::Error err) {
                CL_TO_AF_ERROR(err);
                throw;
            }
        }
void convNHelper(const conv_kparam_t& param, Param& out, const Param& signal, const Param& filter)
{
    std::string ref_name = std::string("convolveND_") +
        std::string(dtype_traits<T>::getName()) + std::string(dtype_traits<aT>::getName()) +
        std::to_string(bDim) + std::to_string(expand);

    int device = getActiveDeviceId();

    kc_entry_t entry = kernelCache(device, ref_name);

    if (entry.prog==0 && entry.ker==0) {
        std::ostringstream options;
        options << " -D T="         << dtype_traits<T>::getName()
                << " -D Ti="        << dtype_traits<T>::getName()
                << " -D To="        << dtype_traits<aT>::getName()
                << " -D accType="   << dtype_traits<aT>::getName()
                << " -D BASE_DIM="  << bDim
                << " -D EXPAND="    << expand
                << " -D "           << binOpName<af_mul_t>();

        if((af_dtype) dtype_traits<T>::af_type == c32 ||
            (af_dtype) dtype_traits<T>::af_type == c64) {
            options << " -D CPLX=1";
        } else {
            options << " -D CPLX=0";
        }
        if (std::is_same<T, double>::value || std::is_same<T, cdouble>::value)
            options << " -D USE_DOUBLE";

        const char *ker_strs[] = {ops_cl, convolve_cl};
        const int   ker_lens[] = {ops_cl_len, convolve_cl_len};
        Program prog;
        buildProgram(prog, 2, ker_strs, ker_lens, options.str());

        entry.prog   = new Program(prog);
        entry.ker = new Kernel(*entry.prog, "convolve");

        addKernelToCache(device, ref_name, entry);
    }

    auto convOp = cl::KernelFunctor<Buffer, KParam, Buffer, KParam, cl::LocalSpaceArg, Buffer, KParam,
                                    int, int, int, int, int, int, int, int >(*entry.ker);

    convOp(EnqueueArgs(getQueue(), param.global, param.local),
           *out.data, out.info, *signal.data, signal.info, cl::Local(param.loc_size),
           *param.impulse, filter.info, param.nBBS0, param.nBBS1,
           param.o[0], param.o[1], param.o[2], param.s[0], param.s[1], param.s[2]);
}
Exemplo n.º 8
0
/*
 * Print Entire queue: Debugging
 */
void printQueue()	{
	
	BoardNode boardToPrint = getQueue(NULL)->start;
	int row, col, queueN = 0;
	while(boardToPrint != NULL)	{
		queueN++;
		printf("Queue Item %d\n",queueN);
		for(row = 0; row < MAXROW; row++)	{
			for(col = 0; col < MAXCOL; col++)	{
				printf("%d ",boardToPrint->board[row][col]);
			}
		pNL();
		}
		boardToPrint = boardToPrint->next;
	}
}
Exemplo n.º 9
0
        void random(cl::Buffer out, dim_type elements)
        {
            try {
                static unsigned counter;

                static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
                static Program            ranProgs[DeviceManager::MAX_DEVICES];
                static Kernel           ranKernels[DeviceManager::MAX_DEVICES];

                int device = getActiveDeviceId();

                std::call_once( compileFlags[device], [device] () {
                        Program::Sources setSrc;
                        setSrc.emplace_back(random_cl, random_cl_len);

                        std::ostringstream options;
                        options << " -D T=" << dtype_traits<T>::getName()
                                << " -D repeat="<< REPEAT
                                << " -D " << random_name<T, isRandu>().name();

                        if (std::is_same<T, double>::value) {
                            options << " -D USE_DOUBLE";
                            options << " -D IS_64";
                        }

                        if (std::is_same<T, char>::value) {
                            options << " -D IS_BOOL";
                        }

                        buildProgram(ranProgs[device], random_cl, random_cl_len, options.str());
                        ranKernels[device] = Kernel(ranProgs[device], "random");
                    });

                auto randomOp = make_kernel<cl::Buffer, uint, uint, uint, uint>(ranKernels[device]);

                uint groups = divup(elements, THREADS * REPEAT);
                counter += divup(elements, THREADS * groups);

                NDRange local(THREADS, 1);
                NDRange global(THREADS * groups, 1);

                randomOp(EnqueueArgs(getQueue(), global, local),
                         out, elements, counter, random_seed[0], random_seed[1]);
            } catch(cl::Error ex) {
                CL_TO_AF_ERROR(ex);
            }
        }
/*
 *Checks start of action Queue for command, and actions it if all criteria are met
 */
int popToTower()	{
    ActionQueueStructure queue = getQueue(NULL);
    GameProperties Game = getGame(NULL);
    int needed;
    if(queue->start != NULL) {
        needed = calculateCosts(queue->start->command,queue->start->option,queue->start->target);
        switch(queue->start->command)	{
        case cmd_upgrade:
            if (checkQueue(queue, Game,needed)) {
                upgradeTowerStat(queue->start->option,queue->start->target);
                useMemory(Game, needed);
                removeQueueItem();
            }
            break;
        case cmd_mktwr:
            if (checkQueue(queue,Game,needed)) {
                switch(queue->start->option)	{
                case mktwr_int:
                    createTowerTypeFromPositions(queue->start->target,INT_TYPE);
                    break;
                case mktwr_char:
                    createTowerTypeFromPositions(queue->start->target,CHAR_TYPE);
                    break;
                default:
                    fprintf(stderr,"Unrecognised tower type\n");
                    break;
                }
                //createTowerFromPositions(queue->start->target);
                useMemory(Game, needed);
                removeQueueItem();
            }
            break;
        case cmd_aptget:
            if(checkQueue(queue,Game,needed)) {
                unlock_ability(KILL);
                useMemory(Game, needed);
                removeQueueItem();
            }
        default:

            break;
        }
    } else {
        return 0;
    }
    return 1;
}
Exemplo n.º 11
0
//=============================================================================
// METHOD: SPELLipcMessageMailbox::place
//=============================================================================
bool SPELLipcMessageMailbox::place( std::string id, const SPELLipcMessage& msg )
{
    DEBUG(NAME + "Place message on queue with id " + id + " (" + msg.getSequenceStr() + ")");
    SPELLipcMessageQueue* queue = getQueue(id);
    if(queue)
	{
        DEBUG(NAME + "Place message IN");
    	queue->push(msg);
        DEBUG(NAME + "Place message OUT");
    	return true;
	}
    else
    {
    	LOG_ERROR("###### No queue to place response " + msg.dataStr());
    	return false;
    }
}
Exemplo n.º 12
0
int cholesky_inplace(Array<T> &in, const bool is_upper)
{
    if(OpenCLCPUOffload()) {
        return cpu::cholesky_inplace(in, is_upper);
    }

    dim4 iDims = in.dims();
    int N = iDims[0];

    magma_uplo_t uplo = is_upper ? MagmaUpper : MagmaLower;

    int info = 0;
    cl::Buffer *in_buf = in.get();
    magma_potrf_gpu<T>(uplo, N,
                        (*in_buf)(), in.getOffset(),  in.strides()[1],
                        getQueue()(), &info);
    return info;
}
Exemplo n.º 13
0
void laset(int m, int  n,
           T offdiag, T diag,
           cl_mem dA, size_t dA_offset, magma_int_t ldda)
{
    std::string refName = laset_name<uplo>() + std::string("_") +
        std::string(dtype_traits<T>::getName()) +
        std::to_string(uplo);

    int device = getActiveDeviceId();
    kc_entry_t entry = kernelCache(device, refName);

    if (entry.prog==0 && entry.ker==0) {
        std::ostringstream options;
        options << " -D T=" << dtype_traits<T>::getName()
                << " -D BLK_X=" << BLK_X
                << " -D BLK_Y=" << BLK_Y
                << " -D IS_CPLX=" << af::iscplx<T>();

        if (std::is_same<T, double>::value || std::is_same<T, cdouble>::value)
            options << " -D USE_DOUBLE";

        const char* ker_strs[] = {laset_cl};
        const int   ker_lens[] = {laset_cl_len};
        Program prog;
        buildProgram(prog, 1, ker_strs, ker_lens, options.str());
        entry.prog = new Program(prog);
        entry.ker  = new Kernel(*entry.prog, laset_name<uplo>());

        addKernelToCache(device, refName, entry);
    }

    int groups_x = (m - 1) / BLK_X + 1;
    int groups_y = (n - 1) / BLK_Y + 1;

    NDRange local(BLK_X, 1);
    NDRange global(groups_x * local[0], groups_y * local[1]);

    // retain the cl_mem object during cl::Buffer creation
    cl::Buffer dAObj(dA, true);

    auto lasetOp = KernelFunctor<int, int, T, T, Buffer, unsigned long long, int>(*entry.ker);

    lasetOp(EnqueueArgs(getQueue(), global, local), m, n, offdiag, diag, dAObj, dA_offset, ldda);
}
Exemplo n.º 14
0
Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
{
    trsm_func<T> gpu_trsm;

    Array<T> B = copyArray<T>(b);

    int N = B.dims()[0];
    int NRHS = B.dims()[1];

    const cl::Buffer* A_buf = A.get();
    cl::Buffer* B_buf = B.get();

    cl_event event = 0;
    cl_command_queue queue = getQueue()();

    std::string pName = getPlatformName(getDevice());
    if(pName.find("NVIDIA") != std::string::npos && (options & AF_MAT_UPPER))
    {
        Array<T> AT = transpose<T>(A, true);

        cl::Buffer* AT_buf = AT.get();
        gpu_trsm(clblasColumnMajor,
                 clblasLeft,
                 clblasLower,
                 clblasConjTrans,
                 options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
                 N, NRHS, scalar<T>(1),
                 (*AT_buf)(), AT.getOffset(), AT.strides()[1],
                 (*B_buf)(), B.getOffset(), B.strides()[1],
                 1, &queue, 0, nullptr, &event);
    } else {
        gpu_trsm(clblasColumnMajor,
                 clblasLeft,
                 options & AF_MAT_LOWER ? clblasLower : clblasUpper,
                 clblasNoTrans,
                 options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
                 N, NRHS, scalar<T>(1),
                 (*A_buf)(), A.getOffset(), A.strides()[1],
                 (*B_buf)(), B.getOffset(), B.strides()[1],
                 1, &queue, 0, nullptr, &event);
    }

    return B;
}
int32_t InnerUdtServer::sendMessage(idgs::actor::ActorMessagePtr& msg) {
  int32_t memberId = msg->getDestMemberId();
  if(memberId < 0) {
    LOG(ERROR) << "Invalid member ID: " << memberId;
    return RC_ERROR;
  }
  auto q = getQueue(memberId);

  msg->freePbMemory();

  q->push(msg);

  std::shared_ptr<InnerUdtConnection> conn = getConnection(memberId);
  if(conn) {
    conn->sendMessage(msg);
  }

  return 0;
}
Exemplo n.º 16
0
Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
{
    gpu_blas_trsm_func<T> gpu_blas_trsm;

    Array<T> B = copyArray<T>(b);

    int N = B.dims()[0];
    int NRHS = B.dims()[1];

    const cl::Buffer* A_buf = A.get();
    cl::Buffer* B_buf = B.get();

    cl_event event = 0;
    cl_command_queue queue = getQueue()();

    if(getActivePlatform() == AFCL_PLATFORM_NVIDIA && (options & AF_MAT_UPPER))
    {
        Array<T> AT = transpose<T>(A, true);

        cl::Buffer* AT_buf = AT.get();
        CLBLAS_CHECK(gpu_blas_trsm(
                         clblasLeft,
                         clblasLower,
                         clblasConjTrans,
                         options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
                         N, NRHS, scalar<T>(1),
                         (*AT_buf)(), AT.getOffset(), AT.strides()[1],
                         (*B_buf)(), B.getOffset(), B.strides()[1],
                         1, &queue, 0, nullptr, &event));
    } else {
        CLBLAS_CHECK(gpu_blas_trsm(
                         clblasLeft,
                         options & AF_MAT_LOWER ? clblasLower : clblasUpper,
                         clblasNoTrans,
                         options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
                         N, NRHS, scalar<T>(1),
                         (*A_buf)(), A.getOffset(), A.strides()[1],
                         (*B_buf)(), B.getOffset(), B.strides()[1],
                         1, &queue, 0, nullptr, &event));
    }

    return B;
}
Exemplo n.º 17
0
void convNHelper(const conv_kparam_t& param, Param& out, const Param& signal, const Param& filter)
{
    try {
        static std::once_flag  compileFlags[DeviceManager::MAX_DEVICES];
        static std::map<int, Program*> convProgs;
        static std::map<int, Kernel*>  convKernels;

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {
                    std::ostringstream options;
                    options << " -D T=" << dtype_traits<T>::getName()
                            << " -D accType="<< dtype_traits<aT>::getName()
                            << " -D BASE_DIM="<< bDim
                            << " -D EXPAND=" << expand;
                    if (std::is_same<T, double>::value ||
                        std::is_same<T, cdouble>::value) {
                        options << " -D USE_DOUBLE";
                    }
                    Program prog;
                    buildProgram(prog, convolve_cl, convolve_cl_len, options.str());
                    convProgs[device]   = new Program(prog);
                    convKernels[device] = new Kernel(*convProgs[device], "convolve");
                });

        auto convOp = cl::KernelFunctor<Buffer, KParam, Buffer, KParam,
                                        cl::LocalSpaceArg, Buffer, KParam,
                                        int, int,
                                        int, int, int,
                                        int, int, int
                                       >(*convKernels[device]);

        convOp(EnqueueArgs(getQueue(), param.global, param.local),
                *out.data, out.info, *signal.data, signal.info, cl::Local(param.loc_size),
                *param.impulse, filter.info, param.nBBS0, param.nBBS1,
                param.o[0], param.o[1], param.o[2], param.s[0], param.s[1], param.s[2]);

    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}
Exemplo n.º 18
0
void susan(cl::Buffer* out, const cl::Buffer* in, const unsigned in_off,
           const unsigned idim0, const unsigned idim1, const float t,
           const float g, const unsigned edge) {
    std::string refName = std::string("susan_responses_") +
                          std::string(dtype_traits<T>::getName()) +
                          std::to_string(radius);

    int device       = getActiveDeviceId();
    kc_entry_t entry = kernelCache(device, refName);

    if (entry.prog == 0 && entry.ker == 0) {
        const size_t LOCAL_MEM_SIZE =
            (SUSAN_THREADS_X + 2 * radius) * (SUSAN_THREADS_Y + 2 * radius);
        std::ostringstream options;
        options << " -D T=" << dtype_traits<T>::getName()
                << " -D LOCAL_MEM_SIZE=" << LOCAL_MEM_SIZE
                << " -D BLOCK_X=" << SUSAN_THREADS_X
                << " -D BLOCK_Y=" << SUSAN_THREADS_Y << " -D RADIUS=" << radius
                << " -D RESPONSE";
        if (std::is_same<T, double>::value || std::is_same<T, cdouble>::value)
            options << " -D USE_DOUBLE";

        const char* ker_strs[] = {susan_cl};
        const int ker_lens[]   = {susan_cl_len};
        Program prog;
        buildProgram(prog, 1, ker_strs, ker_lens, options.str());
        entry.prog = new Program(prog);
        entry.ker  = new Kernel(*entry.prog, "susan_responses");

        addKernelToCache(device, refName, entry);
    }

    auto susanOp = KernelFunctor<Buffer, Buffer, unsigned, unsigned, unsigned,
                                 float, float, unsigned>(*entry.ker);

    NDRange local(SUSAN_THREADS_X, SUSAN_THREADS_Y);
    NDRange global(divup(idim0 - 2 * edge, local[0]) * local[0],
                   divup(idim1 - 2 * edge, local[1]) * local[1]);

    susanOp(EnqueueArgs(getQueue(), global, local), *out, *in, in_off, idim0,
            idim1, t, g, edge);
}
Exemplo n.º 19
0
int cholesky_inplace(Array<T> &in, const bool is_upper)
{
    try {
        initBlas();

        dim4 iDims = in.dims();
        int N = iDims[0];

        magma_uplo_t uplo = is_upper ? MagmaUpper : MagmaLower;

        int info = 0;
        cl::Buffer *in_buf = in.get();
        magma_potrf_gpu<T>(uplo, N,
                           (*in_buf)(), in.getOffset(),  in.strides()[1],
                           getQueue()(), &info);
        return info;
    } catch (cl::Error &err) {
        CL_TO_AF_ERROR(err);
    }
}
Exemplo n.º 20
0
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                 const Array<T> &b, const af_mat_prop options)
{
    A.eval();
    pivot.eval();
    b.eval();

    int N        = A.dims()[0];
    int NRHS     = b.dims()[1];
    Array< T > B = copyArray<T>(b);

    auto func = [=] (Array<T> A, Array<T> B, Array<int> pivot, int N, int NRHS) {
        getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
                        N, NRHS, A.get(), A.strides()[1],
                        pivot.get(), B.get(), B.strides()[1]);
    };
    getQueue().enqueue(func, A, B, pivot, N, NRHS);

    return B;
}
Exemplo n.º 21
0
/* Input:		An empty queue, and an initialized sudoku s, an empty queue
 *					for the solutions, and a pointer to a variable to store the
 *					number of guesses
 * Returns: NULL if no possible solutions exist or the queue is not
 *					empty, and otherwise the solution to the input puzzle.
 *		-----------------------------------------------------------
 * Solves the puzzle using backtracking. The solver is initialized by
 * putting the puzzle into the empty queue. Then, each iteration will
 * pull a board out of the queue, perform a simple reduction on that
 * board, and then make a guess on the cell which has the least number
 * of possibilities. If verbose is set, it prints each board before
 * making a guess, giving a sense of the whole solution process.
 * 
 * If there is nothing to pull out of the queue, there are no possible
 * solutions to the puzzle, and the function returns an error value of
 * NULL. If the queue is not empty at initialization, the function
 * prints an error message regardless of the state of the flags, and
 * returns an error.
 */
sudoku solve(queue q, sudoku s, int * guesses)
{
	if (!isEmptyQueue(q)) {
		printf("Error: call to solve with a non-empty queue");
		return NULL;
	}
	
  putQueue(q, (void *) s);
	
	*guesses = 0;
	
  int slvd = 0;
  while (1)
	{
		if (getQueue(q, (void **) &s)){
			return NULL;
		}
		
    reduce(s);
		
    if(verbose) {
      system("clear");
			printSudoku(s, pretty);
			printf("\n");
		}
		
    slvd = checkSudoku(s);
    if (slvd == -1) deleteSudoku(s);
		else if (slvd == 1) return s;
    else
    {
      if(guess(q, s)) {
				printf("Error: Full queue");
				return NULL;
			}
      deleteSudoku(s);
      (*guesses)++;
    }
	}
}
Exemplo n.º 22
0
void EmitterSystem::update(Camera & camera, float frametime)
{
	Vector3 near = camera.unProject(Mouse::getPosition(), 0.f);
	Vector3 far = camera.unProject(Mouse::getPosition(), 1.f);
	Vector3 dir = (far - near);
	dir.normalize();
	dir.x *= lerp(1.f, 4.f, std::abs(Mouse::getPosition().x - 0.5f) / 0.5f);
	dir.y *= lerp(1.f, 4.f, std::abs(Mouse::getPosition().y - 0.5f) / 0.5f);

	cl::Event event;
	cl_int err = 0;
	glFinish();

	if (Keyboard::isKeyPressed(GLFW_KEY_F))
		init();
	if (Keyboard::isKeyPressed(GLFW_KEY_H))
		m_disableVelocity = !m_disableVelocity;

	if (m_disableVelocity)
		frametime = 0.f;

	std::vector<cl::Memory> buffers;
	buffers.push_back(m_glBuffer[Index::Particles]);
	err = getQueue().enqueueAcquireGLObjects(&buffers, NULL, NULL);
	if (err != CL_SUCCESS)
		std::cout << "Failed acquiring GL object : " << getError(err) << std::endl;
	err = getQueue().finish();
	if (err != CL_SUCCESS)
		std::cout << "ERROR" << std::endl;

	err = getKernel().setArg(3, frametime);
	if (err != CL_SUCCESS)
		std::cout << "ERROR kernel args" << std::endl;

	err = getQueue().enqueueNDRangeKernel(getKernel(), cl::NullRange, cl::NDRange(m_particleCount), cl::NullRange, NULL, &event);
	if (err != CL_SUCCESS)
		std::cout << "Failed enqueueing kernel : " << getError(err) << std::endl;
	err = event.wait();
	if (err != CL_SUCCESS)
		std::cout << "ERROR" << std::endl;

	getQueue().enqueueCopyBuffer(m_clBuffer[Index::Particles], m_glBuffer[Index::Particles], 0, 0, m_particleCount * sizeof(Particle), NULL, NULL);
	err = getQueue().enqueueReleaseGLObjects(&buffers, NULL, NULL);
	if (err != CL_SUCCESS)
		std::cout << "Failed releasing GL object : " << getError(err) << std::endl;
	getQueue().finish();
}
Exemplo n.º 23
0
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                 const Array<T> &b, const af_mat_prop options)
{
    int N = A.dims()[0];
    int NRHS = b.dims()[1];

    std::vector<int> ipiv(N);
    copyData(&ipiv[0], pivot);

    Array< T > B = copyArray<T>(b);

    const cl::Buffer *A_buf = A.get();
    cl::Buffer *B_buf = B.get();

    int info = 0;
    magma_getrs_gpu<T>(MagmaNoTrans, N, NRHS,
                       (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0],
                       (*B_buf)(), B.getOffset(), B.strides()[1],
                       getQueue()(), &info);
    return B;
}
Exemplo n.º 24
0
Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
{
    A.eval();
    b.eval();

    Array<T> B = copyArray<T>(b);
    int N      = B.dims()[0];
    int NRHS   = B.dims()[1];

    auto func = [=] (Array<T> A, Array<T> B, int N, int NRHS, const af_mat_prop options) {
        trtrs_func<T>()(AF_LAPACK_COL_MAJOR,
                        options & AF_MAT_UPPER ? 'U' : 'L',
                        'N', // transpose flag
                        options & AF_MAT_DIAG_UNIT ? 'U' : 'N',
                        N, NRHS,
                        A.get(), A.strides()[1],
                        B.get(), B.strides()[1]);
    };
    getQueue().enqueue(func, A, B, N, NRHS, options);

    return B;
}
Exemplo n.º 25
0
Array<T>::Array(const dim4 &dims, T *const in_data, bool is_device,
                bool copy_device)
    : info(getActiveDeviceId(), dims, 0, calcStrides(dims),
           (af_dtype)dtype_traits<T>::af_type)
    , data((is_device & !copy_device) ? (T *)in_data
                                      : memAlloc<T>(dims.elements()).release(),
           memFree<T>)
    , data_dims(dims)
    , node(bufferNodePtr<T>())
    , ready(true)
    , owner(true) {
    static_assert(is_standard_layout<Array<T>>::value,
                  "Array<T> must be a standard layout type");
    static_assert(
        offsetof(Array<T>, info) == 0,
        "Array<T>::info must be the first member variable of Array<T>");
    if (!is_device || copy_device) {
        // Ensure the memory being written to isnt used anywhere else.
        getQueue().sync();
        copy(in_data, in_data + dims.elements(), data.get());
    }
}
Exemplo n.º 26
0
void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot,
        const Array<T> &in) {
    lower.eval();
    upper.eval();
    pivot.eval();
    in.eval();

    dim4 iDims = in.dims();
    int M      = iDims[0];
    int N      = iDims[1];

    Array<T> in_copy = copyArray<T>(in);
    pivot            = lu_inplace(in_copy);

    // SPLIT into lower and upper
    dim4 ldims(M, min(M, N));
    dim4 udims(min(M, N), N);
    lower = createEmptyArray<T>(ldims);
    upper = createEmptyArray<T>(udims);

    getQueue().enqueue(kernel::lu_split<T>, lower, upper, in_copy);
}
Exemplo n.º 27
0
    Array<T>* setIntersect(const Array<T> &first,
                           const Array<T> &second,
                           const bool is_unique)
    {
        if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }
        Array<T> unique_first = first;
        Array<T> unique_second = second;

        if (!is_unique) {
            unique_first  = *setUnique(first, false);
            unique_second = *setUnique(second, false);
        }

        size_t out_size = std::max(unique_first.dims()[0], unique_second.dims()[0]);
        Array<T> *out = createEmptyArray<T>(dim4(out_size, 1, 1, 1));

        compute::command_queue queue(getQueue()());

        compute::buffer first_data((*unique_first.get())());
        compute::buffer second_data((*unique_second.get())());
        compute::buffer out_data((*out->get())());

        compute::buffer_iterator<T> first_begin(first_data, 0);
        compute::buffer_iterator<T> first_end(first_data, unique_first.dims()[0]);
        compute::buffer_iterator<T> second_begin(second_data, 0);
        compute::buffer_iterator<T> second_end(second_data, unique_second.dims()[0]);
        compute::buffer_iterator<T> out_begin(out_data, 0);

        compute::buffer_iterator<T> out_end = compute::set_intersection(
            first_begin, first_end, second_begin, second_end, out_begin, queue
        );

        out->resetDims(dim4(std::distance(out_begin, out_end), 1, 1, 1));

        return out;
    }
void testPushToQueue()	{

    cmdType nCommand_1=cmd_upgrade;
    cmdOption nStat_1=upgrade_power;
    int tar_1 = 1;

    cmdType nCommand_2=cmd_upgrade;
    cmdOption nStat_2=upgrade_range;
    int tar_2 = 2;

    ActionQueueStructure newQueue = getQueue(NULL);

    sput_fail_unless(pushToQueue(newQueue,nCommand_1,nStat_1,tar_1) == 1,"Valid: 1 Queue Item");
    sput_fail_unless(pushToQueue(newQueue,nCommand_2,nStat_2,tar_2) == 2,"Valid: 2 Queue Items");
    sput_fail_unless(getFirstCommand(newQueue) == cmd_upgrade,"Valid: Top of Queue Upgrade Command");
    sput_fail_unless(getFirstOption(newQueue) == upgrade_power,"Valid: Top of Queue Power Option");
    sput_fail_unless(getLastCommand(newQueue) == cmd_upgrade,"Valid: Last in Queue upgrade Command");
    sput_fail_unless(getLastOption(newQueue) == upgrade_range,"Valid: Last of Queue range Option");
    pushToQueue(newQueue,cmd_mktwr,mktwr_int,2);
    sput_fail_unless(getLastCommand(newQueue) == cmd_mktwr,"Valid: Last in Queue make tower command");
    sput_fail_unless(getLastOption(newQueue) == mktwr_int,"Valid: Last option in Queue is int tower");
    clearQueue();
}
Exemplo n.º 29
0
Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
             af_blas_transpose optLhs, af_blas_transpose optRhs)
{
    initBlas();

    int N = lhs.dims()[0];
    dot_func<T> dot;
    cl::Event event;
    auto out = createEmptyArray<T>(af::dim4(1));
    cl::Buffer scratch(getContext(), CL_MEM_READ_WRITE, sizeof(T) * N);
    clblasStatus err;
    err = dot(N,
              (*out.get())(), out.getOffset(),
              (*lhs.get())(),  lhs.getOffset(), lhs.strides()[0],
              (*rhs.get())(),  rhs.getOffset(), rhs.strides()[0],
              scratch(),
              1, &getQueue()(), 0, nullptr, &event());

    if(err) {
        throw runtime_error(std::string("CLBLAS error: ") + std::to_string(err));
    }
    return out;
}
Exemplo n.º 30
0
Array<T> sparseConvertStorageToDense(const SparseArray<T> &in_)
{
    in_.eval();

    Array<T> dense_ = createValueArray<T>(in_.dims(), scalar<T>(0));
    dense_.eval();

    auto func = [=] (Array<T> dense, const SparseArray<T> in) {
        Array<T  > values = in.getValues();
        Array<int> rowIdx = in.getRowIdx();
        Array<int> colIdx = in.getColIdx();

        kernel::csr_dense<T>()(dense, values, rowIdx, colIdx);
    };

    getQueue().enqueue(func, dense_, in_);

    if(stype == AF_STORAGE_CSR)
        return dense_;
    else
        AF_ERROR("CPU Backend only supports Dense to CSR or COO", AF_ERR_NOT_SUPPORTED);

    return dense_;
}