Esempio n. 1
0
void mean_first(Param out, Param in, Param inWeight)
{
    uint threads_x = nextpow2(std::max(32u, (uint)in.info.dims[0]));
    threads_x = std::min(threads_x, THREADS_PER_GROUP);
    uint threads_y = THREADS_PER_GROUP / threads_x;

    uint groups_x = divup(in.info.dims[0], threads_x * REPEAT);
    uint groups_y = divup(in.info.dims[1], threads_y);

    Param tmpOut = out;
    Param noWeight;
    noWeight.info.offset = 0;
    for (int k = 0; k < 4; ++k) {
        noWeight.info.dims[k] = 0;
        noWeight.info.strides[k] = 0;
    }
    // Does not matter what the value is it will not be used. Just needs to be valid.
    noWeight.data = inWeight.data;

    Param tmpWeight = noWeight;

    if (groups_x > 1) {

        tmpOut.data = bufferAlloc(groups_x *
                in.info.dims[1] *
                in.info.dims[2] *
                in.info.dims[3] *
                sizeof(To));

        tmpWeight.data = bufferAlloc(groups_x *
                in.info.dims[1] *
                in.info.dims[2] *
                in.info.dims[3] *
                sizeof(Tw));


        tmpOut.info.dims[0] = groups_x;
        for (int k = 1; k < 4; k++) tmpOut.info.strides[k] *= groups_x;
        tmpWeight.info = tmpOut.info;
    }

    mean_first_launcher<Ti, Tw, To>(tmpOut, tmpWeight, in, inWeight, threads_x, groups_x, groups_y);

    if (groups_x > 1) {
        // No Weight is needed when writing out the output.
        mean_first_launcher<Ti, Tw, To>(out, noWeight, tmpOut, tmpWeight, threads_x, 1, groups_y);

        bufferFree(tmpOut.data);
        bufferFree(tmpWeight.data);
    }
}
Esempio n. 2
0
bool rtspRequestSetContent(RTSPRequest *rtspRequest, uint8_t *content, size_t contentSize, char *contentType) {
	char contentSizeString[MAX_NUMBER_STRING_SIZE];

	/* Add header field for content type */
	if(!rtspRequestAddHeaderField(rtspRequest, "Content-Type", contentType)) {
		return false;
	}

	/* Add header field for content size */
	sprintf(contentSizeString, "%lu", (unsigned long)contentSize);
	if(!rtspRequestAddHeaderField(rtspRequest, "Content-Length", contentSizeString)) {
		return false;
	}

	/* Free any existing buffer */
	if(rtspRequest->contentBuffer != NULL) {
		if(!bufferFree(&rtspRequest->contentBuffer)) {
			return false;
		}
	}

	/* Allocate buffer */
	if(!bufferAllocate(&rtspRequest->contentBuffer, contentSize, "RTSP request content buffer")) {
		return false;
	}

	/* Copy buffer */
	memcpy(rtspRequest->contentBuffer, content, contentSize);
	rtspRequest->contentBufferSize = contentSize;

	return true;
}
Esempio n. 3
0
    static void scan_dim(Param &out, const Param &in, int dim)
    {
        uint threads_y = std::min(THREADS_Y, nextpow2(out.info.dims[dim]));
        uint threads_x = THREADS_X;

        uint groups_all[] = {divup((uint)out.info.dims[0], threads_x),
                              (uint)out.info.dims[1],
                              (uint)out.info.dims[2],
                              (uint)out.info.dims[3]};

        groups_all[dim] = divup(out.info.dims[dim], threads_y * REPEAT);

        if (groups_all[dim] == 1) {

            scan_dim_launcher<Ti, To, op, inclusive_scan>(out, out, in,
                                          dim, true,
                                          threads_y,
                                          groups_all);
        } else {

            Param tmp = out;

            tmp.info.dims[dim] = groups_all[dim];
            tmp.info.strides[0] = 1;
            for (int k = 1; k < 4; k++) {
                tmp.info.strides[k] = tmp.info.strides[k - 1] * tmp.info.dims[k - 1];
            }

            int tmp_elements = tmp.info.strides[3] * tmp.info.dims[3];
            // FIXME: Do I need to free this ?
            tmp.data = bufferAlloc(tmp_elements * sizeof(To));

            scan_dim_launcher<Ti, To, op, inclusive_scan>(out, tmp, in,
                                          dim, false,
                                          threads_y,
                                          groups_all);

            int gdim = groups_all[dim];
            groups_all[dim] = 1;

            if (op == af_notzero_t) {
                scan_dim_launcher<To, To, af_add_t, true>(tmp, tmp, tmp,
                                                    dim, true,
                                                    threads_y,
                                                    groups_all);
            } else {
                scan_dim_launcher<To, To,       op, true>(tmp, tmp, tmp,
                                                    dim, true,
                                                    threads_y,
                                                    groups_all);
            }

            groups_all[dim] = gdim;
            bcast_dim_launcher<To, To, op, inclusive_scan>(out, tmp,
                                            dim, true,
                                            threads_y,
                                            groups_all);
            bufferFree(tmp.data);
        }
    }
Esempio n. 4
0
bool protocolCleanup(int pSocket, fd_set *pSocketSet, int pMaxSocket, int pBufferSize)
{
  UNUSED(pSocket);
  UNUSED(pSocketSet);
  UNUSED(pMaxSocket);
  UNUSED(pBufferSize);
  return bufferFree(&gMessageBuffer);
}
Esempio n. 5
0
/**
 * Called when the peer dies.
 */
static void peerProxyKill(PeerProxy* peerProxy, bool errnoIsSet) {
    if (errnoIsSet) {
        ALOGI("Peer %d died. errno: %s", peerProxy->credentials.pid, 
                strerror(errno));
    } else {
        ALOGI("Peer %d died.", peerProxy->credentials.pid);
    }
    
    // If we lost the master, we're up a creek. We can't let this happen.
    if (peerProxy->master) {    
        LOG_ALWAYS_FATAL("Lost connection to master.");
    }

    Peer* localPeer = peerProxy->peer;
    pid_t pid = peerProxy->credentials.pid;
    
    peerLock(localPeer);
    
    // Remember for awhile that the peer died.
    localPeer->deadPeers[localPeer->deadPeerCursor] 
        = peerProxy->credentials.pid;
    localPeer->deadPeerCursor++;
    if (localPeer->deadPeerCursor == PEER_HISTORY) {
        localPeer->deadPeerCursor = 0;
    }
  
    // Remove from peer map.
    hashmapRemove(localPeer->peerProxies, &pid);
    
    // External threads can no longer get to this peer proxy, so we don't 
    // need the lock anymore.
    peerUnlock(localPeer);
    
    // Remove the fd from the selector.
    if (peerProxy->fd != NULL) {
        peerProxy->fd->remove = true;
    }

    // Clear outgoing packet queue.
    while (peerProxyNextPacket(peerProxy)) {}

    bufferFree(peerProxy->inputBuffer);

    // This only applies to the master.
    if (peerProxy->connections != NULL) {
        // We can't leave these other maps pointing to freed memory.
        hashmapForEach(peerProxy->connections, &peerProxyRemoveConnection, 
                peerProxy);
        hashmapFree(peerProxy->connections);
    }

    // Invoke death listener.
    localPeer->onDeath(pid);

    // Free the peer proxy itself.
    free(peerProxy);
}
void convolve2(Param out, const Param signal, const Param filter)
{
    try {
        static std::once_flag  compileFlags[DeviceManager::MAX_DEVICES];
        static std::map<int, Program*>   convProgs;
        static std::map<int, Kernel*>  convKernels;

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {
                const size_t C0_SIZE  = (THREADS_X+2*(fLen-1))* THREADS_Y;
                const size_t C1_SIZE  = (THREADS_Y+2*(fLen-1))* THREADS_X;

                size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);

                    std::ostringstream options;
                    options << " -D T=" << dtype_traits<T>::getName()
                            << " -D accType="<< dtype_traits<accType>::getName()
                            << " -D CONV_DIM="<< conv_dim
                            << " -D EXPAND="<< expand
                            << " -D FLEN="<< fLen
                            << " -D LOCAL_MEM_SIZE="<<locSize;
                    if (std::is_same<T, double>::value ||
                        std::is_same<T, cdouble>::value) {
                        options << " -D USE_DOUBLE";
                    }
                    Program prog;
                    buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
                    convProgs[device]   = new Program(prog);
                    convKernels[device] = new Kernel(*convProgs[device], "convolve");
                });

        auto convOp = make_kernel<Buffer, KParam, Buffer, KParam, Buffer,
                                  int, int>(*convKernels[device]);

        NDRange local(THREADS_X, THREADS_Y);

        int blk_x = divup(out.info.dims[0], THREADS_X);
        int blk_y = divup(out.info.dims[1], THREADS_Y);

        NDRange global(blk_x*signal.info.dims[2]*THREADS_X,
                       blk_y*signal.info.dims[3]*THREADS_Y);

        cl::Buffer *mBuff = bufferAlloc(fLen*sizeof(accType));
        // FIX ME: if the filter array is strided, direct might cause issues
        getQueue().enqueueCopyBuffer(*filter.data, *mBuff, 0, 0, fLen*sizeof(accType));

        convOp(EnqueueArgs(getQueue(), global, local),
               *out.data, out.info, *signal.data, signal.info, *mBuff, blk_x, blk_y);

        bufferFree(mBuff);
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}
Esempio n. 7
0
bool rtspRequestFree(RTSPRequest **rtspRequest) {
	bool result;

	/* Close all opened/allocated resource. Continu if a failure occurs, but remember failure for final result. */
	result = true;
	if(*rtspRequest != NULL) {
		if(!bufferFree(&(*rtspRequest)->headerBuffer)) {
			result = false;
		}
		if(!bufferFree(&(*rtspRequest)->contentBuffer)) {
			result = false;
		}
		if(!bufferFree(rtspRequest)) {
			result = false;
		}
	}

	return result;
}
Esempio n. 8
0
Array<T> index(const Array<T>& in, const af_index_t idxrs[])
{
    kernel::IndexKernelParam_t p;
    std::vector<af_seq> seqs(4, af_span);
    // create seq vector to retrieve output
    // dimensions, offsets & offsets
    for (dim_t x=0; x<4; ++x) {
        if (idxrs[x].isSeq) {
            seqs[x] = idxrs[x].idx.seq;
        }
    }

    // retrieve dimensions, strides and offsets
    dim4 iDims = in.dims();
    dim4 dDims = in.getDataDims();
    dim4 oDims = toDims  (seqs, iDims);
    dim4 iOffs = toOffset(seqs, dDims);
    dim4 iStrds= toStride(seqs, dDims);

    for (dim_t i=0; i<4; ++i) {
        p.isSeq[i] = idxrs[i].isSeq;
        p.offs[i]  = iOffs[i];
        p.strds[i] = iStrds[i];
    }

    Buffer* bPtrs[4];

    std::vector< Array<uint> > idxArrs(4, createEmptyArray<uint>(dim4()));
    // look through indexs to read af_array indexs
    for (dim_t x=0; x<4; ++x) {
        // set index pointers were applicable
        if (!p.isSeq[x]) {
            idxArrs[x] = castArray<uint>(idxrs[x].idx.arr);
            bPtrs[x] = idxArrs[x].get();
            // set output array ith dimension value
            oDims[x] = idxArrs[x].elements();
        }
        else {
            // alloc an 1-element buffer to avoid OpenCL from failing
            bPtrs[x] = bufferAlloc(sizeof(uint));
        }
    }

    Array<T> out = createEmptyArray<T>(oDims);
    if(oDims.elements() == 0) { return out; }

    kernel::index<T>(out, in, p, bPtrs);

    for (dim_t x=0; x<4; ++x) {
        if (p.isSeq[x]) bufferFree(bPtrs[x]);
    }

    return out;
}
Esempio n. 9
0
/*
* Destructor
*/
BufferManager::~BufferManager()
{
	bufferFree(&bufferGPS);
	bufferFree(&bufferLinearAcceleration);
	bufferFree(&bufferAngularAcceleration);
	bufferFree(&bufferHumidity);
	bufferFree(&bufferTemperature);
	bufferFree(&bufferLight);
	bufferFree(&bufferAlert);
}
Esempio n. 10
0
void morph3d(Param       out,
        const Param      in,
        const Param      mask)
{
    std::string refName = std::string("morph3d_") +
        std::string(dtype_traits<T>::getName()) +
        std::to_string(isDilation) + std::to_string(SeLength);

    int device = getActiveDeviceId();
    kc_entry_t entry = kernelCache(device, refName);

    if (entry.prog==0 && entry.ker==0) {
        std::string options = generateOptionsString<T, isDilation, SeLength>();
        const char* ker_strs[] = {morph_cl};
        const int   ker_lens[] = {morph_cl_len};
        Program prog;
        buildProgram(prog, 1, ker_strs, ker_lens, options);
        entry.prog = new Program(prog);
        entry.ker  = new Kernel(*entry.prog, "morph3d");
        addKernelToCache(device, refName, entry);
    }

    auto morphOp = KernelFunctor< Buffer, KParam, Buffer, KParam, Buffer,
                                  cl::LocalSpaceArg, int >(*entry.ker);

    NDRange local(CUBE_X, CUBE_Y, CUBE_Z);

    int blk_x = divup(in.info.dims[0], CUBE_X);
    int blk_y = divup(in.info.dims[1], CUBE_Y);
    int blk_z = divup(in.info.dims[2], CUBE_Z);
    // launch batch * blk_x blocks along x dimension
    NDRange global(blk_x * CUBE_X * in.info.dims[3], blk_y * CUBE_Y, blk_z * CUBE_Z);

    // copy mask/filter to constant memory
    cl_int se_size   = sizeof(T)*SeLength*SeLength*SeLength;
    cl::Buffer *mBuff = bufferAlloc(se_size);
    getQueue().enqueueCopyBuffer(*mask.data, *mBuff, 0, 0, se_size);

    // calculate shared memory size
    const int padding = (SeLength%2==0 ? (SeLength-1) : (2*(SeLength/2)));
    const int locLen  = CUBE_X+padding+1;
    const int locArea = locLen *(CUBE_Y+padding);
    const int locSize = locArea*(CUBE_Z+padding);

    morphOp(EnqueueArgs(getQueue(), global, local),
            *out.data, out.info, *in.data, in.info,
            *mBuff, cl::Local(locSize*sizeof(T)), blk_x);

    bufferFree(mBuff);
    CL_DEBUG_FINISH(getQueue());
}
Esempio n. 11
0
unsigned nonMaximal(cl::Buffer* x_out, cl::Buffer* y_out, cl::Buffer* resp_out,
                    const unsigned idim0, const unsigned idim1,
                    const cl::Buffer* resp_in, const unsigned edge,
                    const unsigned max_corners) {
    unsigned corners_found = 0;

    std::string refName =
        std::string("non_maximal_") + std::string(dtype_traits<T>::getName());

    int device       = getActiveDeviceId();
    kc_entry_t entry = kernelCache(device, refName);

    if (entry.prog == 0 && entry.ker == 0) {
        std::ostringstream options;
        options << " -D T=" << dtype_traits<T>::getName() << " -D NONMAX";
        if (std::is_same<T, double>::value || std::is_same<T, cdouble>::value)
            options << " -D USE_DOUBLE";

        const char* ker_strs[] = {susan_cl};
        const int ker_lens[]   = {susan_cl_len};
        Program prog;
        buildProgram(prog, 1, ker_strs, ker_lens, options.str());
        entry.prog = new Program(prog);
        entry.ker  = new Kernel(*entry.prog, "non_maximal");

        addKernelToCache(device, refName, entry);
    }

    cl::Buffer* d_corners_found = bufferAlloc(sizeof(unsigned));
    getQueue().enqueueWriteBuffer(*d_corners_found, CL_TRUE, 0,
                                  sizeof(unsigned), &corners_found);

    auto nonMaximalOp =
        KernelFunctor<Buffer, Buffer, Buffer, Buffer, unsigned, unsigned,
                      Buffer, unsigned, unsigned>(*entry.ker);

    NDRange local(SUSAN_THREADS_X, SUSAN_THREADS_Y);
    NDRange global(divup(idim0 - 2 * edge, local[0]) * local[0],
                   divup(idim1 - 2 * edge, local[1]) * local[1]);

    nonMaximalOp(EnqueueArgs(getQueue(), global, local), *x_out, *y_out,
                 *resp_out, *d_corners_found, idim0, idim1, *resp_in, edge,
                 max_corners);

    getQueue().enqueueReadBuffer(*d_corners_found, CL_TRUE, 0, sizeof(unsigned),
                                 &corners_found);
    bufferFree(d_corners_found);

    return corners_found;
}
Esempio n. 12
0
bool rtspRequestAddHeaderField(RTSPRequest *rtspRequest, const char *fieldName, const char *fieldValue) {
	size_t fieldNameLength;
	size_t fieldValueLength;
	int charsWritten;

	/* Allocate initial buffer if required */
	if(rtspRequest->headerBuffer == NULL) {
		rtspRequest->maxHeaderBufferSize = HEADER_BUFFER_INITIAL_SIZE;
		if(!bufferAllocate(&rtspRequest->headerBuffer, rtspRequest->maxHeaderBufferSize, "RTSP request header buffer")) {
			return false;
		}
		rtspRequest->headerBuffer[0] = '\0';
		rtspRequest->headerBufferSize = 1;	/* The '\0' byte */
	}

	/* Decide if enough space is available in buffer and add space if necessary. Add 4 bytes for ": " and "\r\n". */
	fieldNameLength = strlen(fieldName);
	fieldValueLength = strlen(fieldValue);
	if(!bufferMakeRoom(&rtspRequest->headerBuffer, &rtspRequest->maxHeaderBufferSize, rtspRequest->headerBufferSize, fieldNameLength + fieldValueLength + 4, HEADER_BUFFER_INCREMENT_SIZE)) {
		logWrite(LOG_LEVEL_ERROR, LOG_COMPONENT_NAME, "Cannot allocate memory to add field \"%s\" to RTSP Request header.", fieldName);
		bufferFree(&rtspRequest->headerBuffer);
		rtspRequest->headerBuffer = NULL;
		rtspRequest->headerBufferSize = 0;
		return false;
	}

	/* Add field name and value to buffer */
	/* Offet (headerBufferSize - 1) to overwrite existing '\0' byte. A new '\0' byte will be added at the end. */
	charsWritten = snprintf((char *)rtspRequest->headerBuffer + rtspRequest->headerBufferSize - 1, rtspRequest->maxHeaderBufferSize - (rtspRequest->headerBufferSize - 1), "%s: %s\r\n", fieldName, fieldValue);
	if(charsWritten != fieldNameLength + fieldValueLength + 4) {
		logWrite(LOG_LEVEL_ERROR, LOG_COMPONENT_NAME, "Cannot add field \"%s\" to RTSP Request header.", fieldName);
		return false;
	}

	/* Update buffer data */
	rtspRequest->headerBufferSize += charsWritten;

	return true;
}
Esempio n. 13
0
static void computerTransmitTaskLoop(void *parameters) {
    struct dataQueueEntry entry;

    while(1) {
        // Wait for available data
        xQueueReceive(computerOutputQueue, &entry, portMAX_DELAY);

        sendByteSerial(START_BYTE);

        unsigned char csum = 0;

        sendByteSerial(entry.length);
        csum += entry.length;

        int i;
        for (i = 0; i < entry.length; i++) {
            sendByteSerial(entry.buffer->data[i]);
            csum += entry.buffer->data[i];
        }
        sendByteSerial(255 - csum);
        bufferFree(entry.buffer);
    }
}
Esempio n. 14
0
static void where(Param &out, Param &in)
{
    uint threads_x = nextpow2(std::max(32u, (uint)in.info.dims[0]));
    threads_x = std::min(threads_x, THREADS_PER_GROUP);
    uint threads_y = THREADS_PER_GROUP / threads_x;

    uint groups_x = divup(in.info.dims[0], threads_x * REPEAT);
    uint groups_y = divup(in.info.dims[1], threads_y);

    Param rtmp;
    Param otmp;

    rtmp.info.dims[0] = groups_x;
    otmp.info.dims[0] = in.info.dims[0];

    rtmp.info.strides[0] = 1;
    otmp.info.strides[0] = 1;

    rtmp.info.offset = 0;
    otmp.info.offset = 0;

    for (int k = 1; k < 4; k++) {
        rtmp.info.dims[k] = in.info.dims[k];
        rtmp.info.strides[k] = rtmp.info.strides[k - 1] * rtmp.info.dims[k - 1];

        otmp.info.dims[k] = in.info.dims[k];
        otmp.info.strides[k] = otmp.info.strides[k - 1] * otmp.info.dims[k - 1];
    }

    int rtmp_elements = rtmp.info.strides[3] * rtmp.info.dims[3];
    rtmp.data = bufferAlloc(rtmp_elements * sizeof(uint));

    int otmp_elements = otmp.info.strides[3] * otmp.info.dims[3];
    otmp.data = bufferAlloc(otmp_elements * sizeof(uint));

    scan_first_launcher<T, uint, af_notzero_t>(otmp, rtmp, in, false, groups_x, groups_y, threads_x);

    // Linearize the dimensions and perform scan
    Param ltmp = rtmp;
    ltmp.info.offset = 0;
    ltmp.info.dims[0] = rtmp_elements;
    for (int k = 1; k < 4; k++) {
        ltmp.info.dims[k] = 1;
        ltmp.info.strides[k] = rtmp_elements;
    }

    scan_first<uint, uint, af_add_t>(ltmp, ltmp);

    // Get output size and allocate output
    uint total;
    getQueue().enqueueReadBuffer(*rtmp.data, CL_TRUE,
                                  sizeof(uint) * (rtmp_elements - 1),
                                  sizeof(uint),
                                  &total);

    out.data = bufferAlloc(total * sizeof(uint));

    out.info.dims[0] = total;
    out.info.strides[0] = 1;
    for (int k = 1; k < 4; k++) {
        out.info.dims[k] = 1;
        out.info.strides[k] = total;
    }

    if (total > 0)
        get_out_idx<T>(out.data, otmp, rtmp, in, threads_x, groups_x, groups_y);

    bufferFree(rtmp.data);
    bufferFree(otmp.data);
}
Esempio n. 15
0
/** Frees a packet of bytes. */
static void outgoingPacketFreeBytes(OutgoingPacket* packet) {
    ALOGD("Freeing outgoing packet.");
    bufferFree(packet->bytes);
    free(packet);
}
Esempio n. 16
0
void csrmm_nt(Param out, const Param &values, const Param &rowIdx,
              const Param &colIdx, const Param &rhs, const T alpha,
              const T beta) {
    bool use_alpha = (alpha != scalar<T>(1.0));
    bool use_beta  = (beta != scalar<T>(0.0));

    // Using greedy indexing is causing performance issues on many platforms
    // FIXME: Figure out why
    bool use_greedy = false;

    std::string ref_name = std::string("csrmm_nt_") +
                           std::string(dtype_traits<T>::getName()) +
                           std::string("_") + std::to_string(use_alpha) +
                           std::string("_") + std::to_string(use_beta) +
                           std::string("_") + std::to_string(use_greedy);

    int device = getActiveDeviceId();

    kc_entry_t entry = kernelCache(device, ref_name);

    if (entry.prog == 0 && entry.ker == 0) {
        std::ostringstream options;
        options << " -D T=" << dtype_traits<T>::getName();
        options << " -D USE_ALPHA=" << use_alpha;
        options << " -D USE_BETA=" << use_beta;
        options << " -D USE_GREEDY=" << use_greedy;
        options << " -D THREADS_PER_GROUP=" << THREADS_PER_GROUP;

        if (std::is_same<T, double>::value || std::is_same<T, cdouble>::value) {
            options << " -D USE_DOUBLE";
        }
        if (std::is_same<T, cfloat>::value || std::is_same<T, cdouble>::value) {
            options << " -D IS_CPLX=1";
        } else {
            options << " -D IS_CPLX=0";
        }

        const char *ker_strs[] = {csrmm_cl};
        const int ker_lens[]   = {csrmm_cl_len};

        Program prog;
        buildProgram(prog, 1, ker_strs, ker_lens, options.str());
        entry.prog   = new Program(prog);
        entry.ker    = new Kernel[2];
        entry.ker[0] = Kernel(*entry.prog, "csrmm_nt");
        // FIXME: Change this after adding another kernel
        entry.ker[1] = Kernel(*entry.prog, "csrmm_nt");

        addKernelToCache(device, ref_name, entry);
    }

    auto csrmm_nt_kernel = entry.ker[0];
    auto csrmm_nt_func =
        KernelFunctor<Buffer, Buffer, Buffer, Buffer, int, int, Buffer, KParam,
                      T, T, Buffer>(csrmm_nt_kernel);
    NDRange local(THREADS_PER_GROUP, 1);
    int M = rowIdx.info.dims[0] - 1;
    int N = rhs.info.dims[0];

    int groups_x = divup(N, local[0]);
    int groups_y = divup(M, REPEAT);
    groups_y     = std::min(groups_y, MAX_CSRMM_GROUPS);
    NDRange global(local[0] * groups_x, local[1] * groups_y);

    std::vector<int> count(groups_x);
    cl::Buffer *counter = bufferAlloc(count.size() * sizeof(int));
    getQueue().enqueueWriteBuffer(
        *counter, CL_TRUE, 0, count.size() * sizeof(int), (void *)count.data());

    csrmm_nt_func(EnqueueArgs(getQueue(), global, local), *out.data,
                  *values.data, *rowIdx.data, *colIdx.data, M, N, *rhs.data,
                  rhs.info, alpha, beta, *counter);

    bufferFree(counter);
}
Esempio n. 17
0
void fast(unsigned* out_feat,
          Param &x_out,
          Param &y_out,
          Param &score_out,
          Param in,
          const float thr,
          const float feature_ratio,
          const unsigned edge)
{
    try {
        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
        static std::map<int, Program*> fastProgs;
        static std::map<int, Kernel*>  lfKernel;
        static std::map<int, Kernel*>  nmKernel;
        static std::map<int, Kernel*>  gfKernel;

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {

                std::ostringstream options;
                options << " -D T=" << dtype_traits<T>::getName()
                        << " -D ARC_LENGTH=" << arc_length
                        << " -D NONMAX=" << static_cast<unsigned>(nonmax);

                if (std::is_same<T, double>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D USE_DOUBLE";
                }

                cl::Program prog;
                buildProgram(prog, fast_cl, fast_cl_len, options.str());
                fastProgs[device] = new Program(prog);

                lfKernel[device] = new Kernel(*fastProgs[device], "locate_features");
                nmKernel[device] = new Kernel(*fastProgs[device], "non_max_counts");
                gfKernel[device] = new Kernel(*fastProgs[device], "get_features");
            });

        const unsigned max_feat = ceil(in.info.dims[0] * in.info.dims[1] * feature_ratio);

        // Matrix containing scores for detected features, scores are stored in the
        // same coordinates as features, dimensions should be equal to in.
        cl::Buffer *d_score = bufferAlloc(in.info.dims[0] * in.info.dims[1] * sizeof(float));
        std::vector<float> score_init(in.info.dims[0] * in.info.dims[1], (float)0);
        getQueue().enqueueWriteBuffer(*d_score, CL_TRUE, 0, in.info.dims[0] * in.info.dims[1] * sizeof(float), &score_init[0]);

        cl::Buffer *d_flags = d_score;
        if (nonmax) {
            d_flags = bufferAlloc(in.info.dims[0] * in.info.dims[1] * sizeof(T));
        }

        const int blk_x = divup(in.info.dims[0]-edge*2, FAST_THREADS_X);
        const int blk_y = divup(in.info.dims[1]-edge*2, FAST_THREADS_Y);

        // Locate features kernel sizes
        const NDRange local(FAST_THREADS_X, FAST_THREADS_Y);
        const NDRange global(blk_x * FAST_THREADS_X, blk_y * FAST_THREADS_Y);

        auto lfOp = make_kernel<Buffer, KParam,
                                Buffer, const float, const unsigned,
                                LocalSpaceArg> (*lfKernel[device]);

        lfOp(EnqueueArgs(getQueue(), global, local),
             *in.data, in.info, *d_score, thr, edge,
             cl::Local((FAST_THREADS_X + 6) * (FAST_THREADS_Y + 6) * sizeof(T)));
        CL_DEBUG_FINISH(getQueue());

        const int blk_nonmax_x = divup(in.info.dims[0], 64);
        const int blk_nonmax_y = divup(in.info.dims[1], 64);

        // Nonmax kernel sizes
        const NDRange local_nonmax(FAST_THREADS_NONMAX_X, FAST_THREADS_NONMAX_Y);
        const NDRange global_nonmax(blk_nonmax_x * FAST_THREADS_NONMAX_X, blk_nonmax_y * FAST_THREADS_NONMAX_Y);

        unsigned count_init = 0;
        cl::Buffer *d_total = bufferAlloc(sizeof(unsigned));
        getQueue().enqueueWriteBuffer(*d_total, CL_TRUE, 0, sizeof(unsigned), &count_init);

        //size_t *global_nonmax_dims = global_nonmax();
        size_t blocks_sz = blk_nonmax_x * FAST_THREADS_NONMAX_X * blk_nonmax_y * FAST_THREADS_NONMAX_Y * sizeof(unsigned);
        cl::Buffer *d_counts  = bufferAlloc(blocks_sz);
        cl::Buffer *d_offsets = bufferAlloc(blocks_sz);

        auto nmOp = make_kernel<Buffer, Buffer, Buffer,
                                Buffer, Buffer,
                                KParam, const unsigned> (*nmKernel[device]);
        nmOp(EnqueueArgs(getQueue(), global_nonmax, local_nonmax),
                         *d_counts, *d_offsets, *d_total, *d_flags, *d_score, in.info, edge);
        CL_DEBUG_FINISH(getQueue());

        unsigned total;
        getQueue().enqueueReadBuffer(*d_total, CL_TRUE, 0, sizeof(unsigned), &total);
        total = total < max_feat ? total : max_feat;

        if (total > 0) {
            size_t out_sz = total * sizeof(float);
            x_out.data = bufferAlloc(out_sz);
            y_out.data = bufferAlloc(out_sz);
            score_out.data = bufferAlloc(out_sz);

            auto gfOp = make_kernel<Buffer, Buffer, Buffer,
                                    Buffer, Buffer, Buffer,
                                    KParam, const unsigned,
                                    const unsigned> (*gfKernel[device]);
            gfOp(EnqueueArgs(getQueue(), global_nonmax, local_nonmax),
                             *x_out.data, *y_out.data, *score_out.data,
                             *d_flags, *d_counts, *d_offsets,
                             in.info, total, edge);
            CL_DEBUG_FINISH(getQueue());
        }

        *out_feat = total;

        x_out.info.dims[0] = total;
        x_out.info.strides[0] = 1;
        y_out.info.dims[0] = total;
        y_out.info.strides[0] = 1;
        score_out.info.dims[0] = total;
        score_out.info.strides[0] = 1;

        for (int k = 1; k < 4; k++) {
            x_out.info.dims[k] = 1;
            x_out.info.strides[k] = total;
            y_out.info.dims[k] = 1;
            y_out.info.strides[k] = total;
            score_out.info.dims[k] = 1;
            score_out.info.strides[k] = total;
        }

        bufferFree(d_score);
        if (nonmax) bufferFree(d_flags);
        bufferFree(d_total);
        bufferFree(d_counts);
        bufferFree(d_offsets);
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}
Esempio n. 18
0
 void memFree(T *ptr)
 {
     return bufferFree((cl::Buffer *)ptr);
 }
Esempio n. 19
0
void morph(Param         out,
        const Param      in,
        const Param      mask)
{
    try {
        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
        static std::map<int, Program*> morProgs;
        static std::map<int, Kernel*> morKernels;

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {
                ToNumStr<T> toNumStr;
                T init = isDilation ? Binary<T, af_max_t>().init() : Binary<T, af_min_t>().init();
                std::ostringstream options;
                options << " -D T=" << dtype_traits<T>::getName()
                        << " -D isDilation="<< isDilation
                        << " -D init=" << toNumStr(init)
                        << " -D windLen=" << windLen;
                if (std::is_same<T, double>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D USE_DOUBLE";
                }
                Program prog;
                buildProgram(prog, morph_cl, morph_cl_len, options.str());
                morProgs[device]   = new Program(prog);
                morKernels[device] = new Kernel(*morProgs[device], "morph");
            });

        auto morphOp = KernelFunctor<Buffer, KParam,
                                   Buffer, KParam,
                                   Buffer, cl::LocalSpaceArg,
                                   int, int
                                  >(*morKernels[device]);

        NDRange local(THREADS_X, THREADS_Y);

        int blk_x = divup(in.info.dims[0], THREADS_X);
        int blk_y = divup(in.info.dims[1], THREADS_Y);
        // launch batch * blk_x blocks along x dimension
        NDRange global(blk_x * THREADS_X * in.info.dims[2],
                       blk_y * THREADS_Y * in.info.dims[3]);

        // copy mask/filter to constant memory
        cl_int se_size   = sizeof(T)*windLen*windLen;
        cl::Buffer *mBuff = bufferAlloc(se_size);
        getQueue().enqueueCopyBuffer(*mask.data, *mBuff, 0, 0, se_size);

        // calculate shared memory size
        const int halo    = windLen/2;
        const int padding = 2*halo;
        const int locLen  = THREADS_X + padding + 1;
        const int locSize = locLen * (THREADS_Y+padding);

        morphOp(EnqueueArgs(getQueue(), global, local),
                *out.data, out.info, *in.data, in.info, *mBuff,
                cl::Local(locSize*sizeof(T)), blk_x, blk_y);

        bufferFree(mBuff);

        CL_DEBUG_FINISH(getQueue());
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}
Esempio n. 20
0
void nearest_neighbour(Param idx,
                       Param dist,
                       Param query,
                       Param train,
                       const dim_t dist_dim,
                       const unsigned n_dist)
{
    try {
        const unsigned feat_len = query.info.dims[dist_dim];
        const To max_dist = maxval<To>();

        // Determine maximum feat_len capable of using shared memory (faster)
        cl_ulong avail_lmem = getDevice().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
        size_t lmem_predef = 2 * THREADS * sizeof(unsigned) + feat_len * sizeof(T);
        size_t ltrain_sz = THREADS * feat_len * sizeof(T);
        bool use_lmem = (avail_lmem >= (lmem_predef + ltrain_sz)) ? true : false;
        size_t lmem_sz = (use_lmem) ? lmem_predef + ltrain_sz : lmem_predef;

        unsigned unroll_len = nextpow2(feat_len);
        if (unroll_len != feat_len) unroll_len = 0;

        std::string ref_name =
            std::string("knn_") +
            std::to_string(dist_type) +
            std::string("_") +
            std::to_string(use_lmem) +
            std::string("_") +
            std::string(dtype_traits<T>::getName()) +
            std::string("_") +
            std::to_string(unroll_len);

        int device = getActiveDeviceId();
        kc_t::iterator cache_idx = kernelCaches[device].find(ref_name);

        kc_entry_t entry;
        if (cache_idx == kernelCaches[device].end()) {

                std::ostringstream options;
                options << " -D T=" << dtype_traits<T>::getName()
                        << " -D To=" << dtype_traits<To>::getName()
                        << " -D THREADS=" << THREADS
                        << " -D FEAT_LEN=" << unroll_len;

                switch(dist_type) {
                    case AF_SAD: options <<" -D DISTOP=_sad_"; break;
                    case AF_SSD: options <<" -D DISTOP=_ssd_"; break;
                    case AF_SHD: options <<" -D DISTOP=_shd_ -D __SHD__";
                                 break;
                    default: break;
                }

                if (std::is_same<T, double>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D USE_DOUBLE";
                }

                if (use_lmem)
                    options << " -D USE_LOCAL_MEM";

                cl::Program prog;
                buildProgram(prog,
                             nearest_neighbour_cl,
                             nearest_neighbour_cl_len,
                             options.str());

                entry.prog = new Program(prog);
                entry.ker = new Kernel[3];

                entry.ker[0] = Kernel(*entry.prog, "nearest_neighbour_unroll");
                entry.ker[1] = Kernel(*entry.prog, "nearest_neighbour");
                entry.ker[2] = Kernel(*entry.prog, "select_matches");

                kernelCaches[device][ref_name] = entry;
        } else {
            entry = cache_idx->second;
        }

        const dim_t sample_dim = (dist_dim == 0) ? 1 : 0;

        const unsigned nquery = query.info.dims[sample_dim];
        const unsigned ntrain = train.info.dims[sample_dim];

        unsigned nblk = divup(ntrain, THREADS);
        const NDRange local(THREADS, 1);
        const NDRange global(nblk * THREADS, 1);

        cl::Buffer *d_blk_idx  = bufferAlloc(nblk * nquery * sizeof(unsigned));
        cl::Buffer *d_blk_dist = bufferAlloc(nblk * nquery * sizeof(To));

        // For each query vector, find training vector with smallest Hamming
        // distance per CUDA block
        if (unroll_len > 0) {
            auto huOp = KernelFunctor<Buffer, Buffer,
                                    Buffer, KParam,
                                    Buffer, KParam,
                                    const To,
                                    LocalSpaceArg> (entry.ker[0]);

            huOp(EnqueueArgs(getQueue(), global, local),
                 *d_blk_idx, *d_blk_dist,
                 *query.data, query.info, *train.data, train.info,
                 max_dist, cl::Local(lmem_sz));
        }
        else {
            auto hmOp = KernelFunctor<Buffer, Buffer,
                                    Buffer, KParam,
                                    Buffer, KParam,
                                    const To, const unsigned,
                                    LocalSpaceArg> (entry.ker[1]);

            hmOp(EnqueueArgs(getQueue(), global, local),
                 *d_blk_idx, *d_blk_dist,
                 *query.data, query.info, *train.data, train.info,
                 max_dist, feat_len, cl::Local(lmem_sz));
        }
        CL_DEBUG_FINISH(getQueue());

        const NDRange local_sm(32, 8);
        const NDRange global_sm(divup(nquery, 32) * 32, 8);

        // Reduce all smallest Hamming distances from each block and store final
        // best match
        auto smOp = KernelFunctor<Buffer, Buffer, Buffer, Buffer,
                                const unsigned, const unsigned,
                                const To> (entry.ker[2]);

        smOp(EnqueueArgs(getQueue(), global_sm, local_sm),
             *idx.data, *dist.data,
             *d_blk_idx, *d_blk_dist,
             nquery, nblk, max_dist);
        CL_DEBUG_FINISH(getQueue());

        bufferFree(d_blk_idx);
        bufferFree(d_blk_dist);
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}
 /* configure supplicant & update driver according to new mode of operation */
void wsc_supplicant_associate(void* h_wpa_s, void* h_ssid, u32 WscMode)
{
	int i;		
	bufferObj probeReqBuf;
	struct wpa_supplicant *wpa_s = h_wpa_s;
	struct wpa_ssid *ssid = h_ssid;

	/* 
	if the current state is WSC_STATE_SUCCESS this means that this is the second association 
	in the WPS sequence and we are not supposed to start the EAP-WSC mechnaism.
	*/
	if(WscSupplicantConfig.smState == WSC_STATE_SUCCESS)
	{
		WscSupplicantConfig.smState = WSC_STATE_IDLE;
		return;
	}

	/* 
	if the current state is WSC_STATE_IDLE this means that this is a interruption of 
	the current WPS session.
	*/
	if(WscSupplicantConfig.smState != WSC_STATE_IDLE)
	{
		if(WscSupplicantConfig.WscMode == WSC_MODE_PBC)
		{
			/* 
			If Supplicant is in a process of a Simple Config handhsake and in PBC mode - 
			Cancel registration to 2 Min. Walk-Time PushButton timeout
			*/		
			eloop_cancel_timeout(wsc_supplicant_PushButtonWalktimeTimeout, NULL, NULL);
		}

        /* update driver */
		wpa_drv_set_wsc_mode(h_wpa_s,
					  		WSC_MODE_OFF,
					  		NULL, 
					  		0);
	}

	wpa_printf(MSG_INFO,"wsc_supplicant: Entered wsc_supplicant_associate");
	
	/* init params */
	WscSupplicantConfig.version = DEF_CONFIG_VERSION;
	WscSupplicantConfig.configMethods = DEF_CONFIG_CONFIG_METHODS;
	{
		char temp[10];
		char *p = DEF_CONFIG_UUID;
    	temp[0] = '0';
    	temp[1] = 'x';
        
		/* move past the '0x' on the first pass */
		for (i = 0; i <= 15; i++)
		{
    		p += 2;
        	strncpy(&temp[2], p, 2); 
        	WscSupplicantConfig.uuidE[i] = (u8) (strtoul(temp, NULL, 16));    
		}
	}
	WscSupplicantConfig.primaryDeviceType.category_id = DEF_CONFIG_PRIMARY_DEV_CATEGORY;
	WscSupplicantConfig.primaryDeviceType.oui = DEF_CONFIG_PRIMARY_DEV_OUI;
	WscSupplicantConfig.primaryDeviceType.sub_category_id = DEF_CONFIG_PRIMARY_DEV_SUB_CATEGORY;
	WscSupplicantConfig.rfBand = DEF_CONFIG_RF_BAND;
	WscSupplicantConfig.devicePasswordId = WSC_DEVICEPWDID_DEFAULT; /* Default (PIN) */

	WscSupplicantConfig.authenticationTypeFlags = DEF_CONFIG_AUTH_TYPE_FLAGS;
	memcpy(WscSupplicantConfig.macAddress, wpa_s->own_addr, ETH_ALEN);
	WscSupplicantConfig.encryptionTypeFlags = DEF_CONFIG_ENCR_TYPE_FLAGS;
	WscSupplicantConfig.connectionTypeFlags = DEF_CONFIG_CONN_TYPE_FLAGS;
	WscSupplicantConfig.state = 0;
	WscSupplicantConfig.assocState = 0;
	WscSupplicantConfig.configError = 0;
	sprintf(WscSupplicantConfig.manufacturer, "%s", DEF_CONFIG_MANUFACTURER);
	sprintf(WscSupplicantConfig.modelName, "%s", DEF_CONFIG_MODEL_NAME);
	sprintf(WscSupplicantConfig.modelNumber, "%s", DEF_CONFIG_MODEL_NUMBER);
	sprintf(WscSupplicantConfig.serialNumber, "%s", DEF_CONFIG_SERIAL_NUMBER);
	sprintf(WscSupplicantConfig.deviceName, "%s", DEF_CONFIG_DEVICE_NAME);
	WscSupplicantConfig.osVersion = DEF_CONFIG_OS_VERSION;
	
	WscSupplicantConfig.ssid = ssid;
	WscSupplicantConfig.wpa_s = wpa_s;
	

	switch (WscMode)
	{
		case WSC_MODE_PIN:
			if(!ssid->wsc_pin)
			{
				char c_devPwd[32];
				u8 devPwd[10];
				u32 val;
				u32 checksum;
				
				RAND_bytes(devPwd, LONG_PIN_LEN); 
				sprintf(c_devPwd, "%08u", *(u32 *)devPwd);
				
				/* Compute the checksum */
				c_devPwd[7] = '\0';
				val = strtoul(c_devPwd, NULL, 10 );
				checksum = wsc_supplicant_ComputeChecksum( val );
				val = val*10 + checksum;
				sprintf((char *)(WscSupplicantConfig.password), "%d", val );
				WscSupplicantConfig.password[LONG_PIN_LEN] = '\0';		
				wpa_printf(MSG_INFO, "Random PIN: %c-%c-%c-%c-%c-%c-%c-%c\n", 
					WscSupplicantConfig.password[0], 
					WscSupplicantConfig.password[1], 
					WscSupplicantConfig.password[2], 
					WscSupplicantConfig.password[3], 
					WscSupplicantConfig.password[4], 
					WscSupplicantConfig.password[5], 
					WscSupplicantConfig.password[6], 
					WscSupplicantConfig.password[7]);
			}
			else
			{
				strcpy((char *)(WscSupplicantConfig.password), ssid->wsc_pin);
				ssid->wsc_pin[strlen(ssid->wsc_pin)] = '\0';
			}	
			break;

		case WSC_MODE_PBC:

			for (i = 0 ; i < LONG_PIN_LEN; i++)
			{
				WscSupplicantConfig.password[i] = '0';
			}
			WscSupplicantConfig.password[LONG_PIN_LEN] = '\0';

			WscSupplicantConfig.configMethods |= WSC_CONFMET_PBC;
			WscSupplicantConfig.devicePasswordId = WSC_DEVICEPWDID_PUSH_BTN;
				
			/* Register for 2 Min. Walk-Time PushButton timeout */
            eloop_register_timeout(120, 0, wsc_supplicant_PushButtonWalktimeTimeout, NULL, NULL);
            
			break;

		default:
			
			wpa_printf(MSG_ERROR,"wsc_supplicant: wsc_supplicant_associate: ERROR: Incompatible Simple Config Mode received in scStartEnrollee: (%d)", WscMode);
			WscSupplicantConfig.smState = WSC_STATE_IDLE;
	}
			
	bufferCreateChunk(&probeReqBuf);
	wsc_supplicant_BuildProbeRequest(&WscSupplicantConfig, &probeReqBuf);

	wpa_drv_set_wsc_mode(h_wpa_s,
			  				WscMode,
			  				bufferGetBuf(&probeReqBuf), 
			  				bufferLength(&probeReqBuf));

	bufferFree(&probeReqBuf);

	ssid->key_mgmt = WPA_KEY_MGMT_IEEE8021X;
	ssid->auth_alg = WPA_AUTH_ALG_OPEN;
	ssid->proto = WPA_PROTO_WPA;
	ssid->pairwise_cipher = WPA_CIPHER_TKIP;
	ssid->group_cipher = WPA_CIPHER_TKIP;
	ssid->identity = (u8 *) strdup(ENROLLEE_ID_STRING);
	ssid->identity_len = strlen(ENROLLEE_ID_STRING);								

	WscSupplicantConfig.WscMode = WscMode;	
	WscSupplicantConfig.smState = WSC_STATE_ASSOC;
}
Esempio n. 22
0
int
audioBufferOpen(int frequency, int stereo, int volume)
{
	struct ringBuffer audioBuffer;
	
	int inFd,outFd,ctlFd,cnt,pid;
	int inputFinished=FALSE;
	int percentFull;
	fd_set inFdSet,outFdSet;
	fd_set *outFdPtr; 
	struct timeval timeout;
	int filedes[2];
	int controldes[2];
	
	
	if (pipe(filedes) || pipe(controldes)) 
	{
		perror("pipe");
		exit(-1);
	}
	if ((pid=fork())!=0) 
	{  
		/* if we are the parent */
		control_fd=controldes[1];
		close(filedes[0]);
		buffer_fd=filedes[1];
		close(controldes[0]);
		return(pid);	        /* return the pid */
	}
	
	
	/* we are the child */
	close(filedes[1]);
	inFd=filedes[0];
	close(controldes[1]);
	ctlFd=controldes[0];
	audioOpen(frequency,stereo,volume);
	outFd=getAudioFd();
	initBuffer(&audioBuffer);
	
	while(1) 
	{
		timeout.tv_sec=0;
		timeout.tv_usec=0;
		FD_ZERO(&inFdSet);
		FD_ZERO(&outFdSet);
		FD_SET(ctlFd,&inFdSet);
		FD_SET(outFd,&outFdSet);
		
		if (bufferSize(&audioBuffer)<AUSIZ) 
		{					/* is the buffer too empty */
			outFdPtr = NULL;		/* yes, don't try to write */
			if (inputFinished)		/* no more input, buffer exhausted -> exit */
				break;
		} else
			outFdPtr=&outFdSet;															/* no, select on write */
		
		/* check we have at least AUSIZ bytes left (don't want <1k bits) */
		if ((bufferFree(&audioBuffer)>=AUSIZ) && !inputFinished)
			FD_SET(inFd,&inFdSet);

/* The following selects() are basically all that is left of the system
   dependent code outside the audioIO_*&c files. These selects really
   need to be moved into the audioIO_*.c files and replaced with a
   function like audioIOReady(inFd, &checkIn, &checkAudio, wait) where
   it checks the status of the input or audio output if checkIn or
   checkAudio are set and returns with checkIn or checkAudio set to TRUE
   or FALSE depending on whether or not data is available. If wait is
   FALSE the function should return immediately, if wait is TRUE the
   process should BLOCK until the required condition is met. NB: The
   process MUST relinquish the CPU during this check or it will gobble
   up all the available CPU which sort of defeats the purpose of the
   buffer.

   This is tricky for people who don't have file descriptors (and
   select) to do the job. In that case a buffer implemented using
   threads should work. The way things are set up now a threaded version
   shouldn't be to hard to implement. When I get some time... */

		/* check if we can read or write */
		if (select(MAX3(inFd,outFd,ctlFd)+1,&inFdSet,outFdPtr,NULL,NULL) > -1) 
		{
			if (outFdPtr && FD_ISSET(outFd,outFdPtr)) 
			{							/* need to write */
				int bytesToEnd = AUDIO_BUFFER_SIZE - audioBuffer.outPos;

				percentFull=100*bufferSize(&audioBuffer)/AUDIO_BUFFER_SIZE;
				if (AUSIZ>bytesToEnd) 
				{
					cnt = audioWrite(audioBuffer.bufferPtr + audioBuffer.outPos, bytesToEnd);
					cnt += audioWrite(audioBuffer.bufferPtr, AUSIZ - bytesToEnd);
					audioBuffer.outPos = AUSIZ - bytesToEnd;
				} 
				else 
				{
					cnt = audioWrite(audioBuffer.bufferPtr + audioBuffer.outPos, AUSIZ);
					audioBuffer.outPos += AUSIZ;
				}

			}
			if (FD_ISSET(inFd,&inFdSet)) 
			{								 /* need to read */
			        cnt = read(inFd, audioBuffer.bufferPtr + audioBuffer.inPos, MIN(AUSIZ, AUDIO_BUFFER_SIZE - audioBuffer.inPos));
				if (cnt >= 0) 
				{
					audioBuffer.inPos = (audioBuffer.inPos + cnt) % AUDIO_BUFFER_SIZE;

					if (cnt==0)
						inputFinished=TRUE;
				} 
				else 
					_exit(-1);
			}
			if (FD_ISSET(ctlFd,&inFdSet)) 
			{
				int dummy;

			        cnt = read(ctlFd, &dummy, sizeof dummy);
				if (cnt >= 0) 
				{
					audioBuffer.inPos = audioBuffer.outPos = 0;
					audioFlush();
				} 
				else 
					_exit(-1);
			}
		} 
		else 
			_exit(-1);
	}
	close(inFd);
	audioClose();
	exit(0);
	return 0; /* just to get rid of warnings */
}
Esempio n. 23
0
        void csrmv(Param out,
                   const Param &values, const Param &rowIdx, const Param &colIdx,
                   const Param &rhs, const T alpha, const T beta)
        {
            bool use_alpha = (alpha != scalar<T>(1.0));
            bool use_beta = (beta != scalar<T>(0.0));

            // Using greedy indexing is causing performance issues on many platforms
            // FIXME: Figure out why
            bool use_greedy = false;

            // FIXME: Find a better number based on average non zeros per row
            int threads = 64;

            std::string ref_name =
                std::string("csrmv_") +
                std::string(dtype_traits<T>::getName()) +
                std::string("_") +
                std::to_string(use_alpha) +
                std::string("_") +
                std::to_string(use_beta) +
                std::string("_") +
                std::to_string(use_greedy) +
                std::string("_") +
                std::to_string(threads);

            int device = getActiveDeviceId();

            kc_entry_t entry = kernelCache(device, ref_name);

            if (entry.prog==0 && entry.ker==0) {

                std::ostringstream options;
                options << " -D T=" << dtype_traits<T>::getName();
                options << " -D USE_ALPHA=" << use_alpha;
                options << " -D USE_BETA=" << use_beta;
                options << " -D USE_GREEDY=" << use_greedy;
                options << " -D THREADS=" << threads;

                if (std::is_same<T, double>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D USE_DOUBLE";
                }
                if (std::is_same<T, cfloat>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D IS_CPLX=1";
                } else {
                    options << " -D IS_CPLX=0";
                }

                const char *ker_strs[] = {csrmv_cl};
                const int   ker_lens[] = {csrmv_cl_len};

                Program prog;
                buildProgram(prog, 1, ker_strs, ker_lens, options.str());
                entry.prog = new Program(prog);
                entry.ker  = new Kernel[2];
                entry.ker[0] = Kernel(*entry.prog, "csrmv_thread");
                entry.ker[1] = Kernel(*entry.prog, "csrmv_block");

                addKernelToCache(device, ref_name, entry);
            }

            int count = 0;
            cl::Buffer *counter = bufferAlloc(sizeof(int));
            getQueue().enqueueWriteBuffer(*counter, CL_TRUE,
                                          0,
                                          sizeof(int),
                                          (void *)&count);

            // TODO: Figure out the proper way to choose either csrmv_thread or csrmv_block
            bool is_csrmv_block = true;
            auto csrmv_kernel = is_csrmv_block ? entry.ker[1] : entry.ker[0];
            auto csrmv_func = KernelFunctor<Buffer,
                                            Buffer, Buffer, Buffer,
                                            int,
                                            Buffer, KParam, T, T, Buffer>(csrmv_kernel);

            NDRange local(is_csrmv_block ? threads : THREADS_PER_GROUP, 1);
            int M = rowIdx.info.dims[0] - 1;

            int groups_x = is_csrmv_block ? divup(M, REPEAT) : divup(M, REPEAT * local[0]);
            groups_x = std::min(groups_x, MAX_CSRMV_GROUPS);
            NDRange global(local[0] * groups_x, 1);

            csrmv_func(EnqueueArgs(getQueue(), global, local),
                        *out.data, *values.data, *rowIdx.data, *colIdx.data,
                        M, *rhs.data, rhs.info, alpha, beta, *counter);

            CL_DEBUG_FINISH(getQueue());
            bufferFree(counter);
        }
Esempio n. 24
0
void orb(unsigned* out_feat,
         Param& x_out,
         Param& y_out,
         Param& score_out,
         Param& ori_out,
         Param& size_out,
         Param& desc_out,
         Param image,
         const float fast_thr,
         const unsigned max_feat,
         const float scl_fctr,
         const unsigned levels,
         const bool blur_img)
{
    try {
        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
        static Program            orbProgs[DeviceManager::MAX_DEVICES];
        static Kernel             hrKernel[DeviceManager::MAX_DEVICES];
        static Kernel             kfKernel[DeviceManager::MAX_DEVICES];
        static Kernel             caKernel[DeviceManager::MAX_DEVICES];
        static Kernel             eoKernel[DeviceManager::MAX_DEVICES];

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {

                std::ostringstream options;
                options << " -D T=" << dtype_traits<T>::getName()
                        << " -D BLOCK_SIZE=" << ORB_THREADS_X;

                if (std::is_same<T, double>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D USE_DOUBLE";
                }

                buildProgram(orbProgs[device],
                             orb_cl,
                             orb_cl_len,
                             options.str());

                hrKernel[device] = Kernel(orbProgs[device], "harris_response");
                kfKernel[device] = Kernel(orbProgs[device], "keep_features");
                caKernel[device] = Kernel(orbProgs[device], "centroid_angle");
                eoKernel[device] = Kernel(orbProgs[device], "extract_orb");
            });

        unsigned patch_size = REF_PAT_SIZE;

        unsigned min_side = std::min(image.info.dims[0], image.info.dims[1]);
        unsigned max_levels = 0;
        float scl_sum = 0.f;
        for (unsigned i = 0; i < levels; i++) {
            min_side /= scl_fctr;

            // Minimum image side for a descriptor to be computed
            if (min_side < patch_size || max_levels == levels) break;

            max_levels++;
            scl_sum += 1.f / (float)pow(scl_fctr,(float)i);
        }

        std::vector<cl::Buffer*> d_x_pyr(max_levels);
        std::vector<cl::Buffer*> d_y_pyr(max_levels);
        std::vector<cl::Buffer*> d_score_pyr(max_levels);
        std::vector<cl::Buffer*> d_ori_pyr(max_levels);
        std::vector<cl::Buffer*> d_size_pyr(max_levels);
        std::vector<cl::Buffer*> d_desc_pyr(max_levels);

        std::vector<unsigned> feat_pyr(max_levels);
        unsigned total_feat = 0;

        // Compute number of features to keep for each level
        std::vector<unsigned> lvl_best(max_levels);
        unsigned feat_sum = 0;
        for (unsigned i = 0; i < max_levels-1; i++) {
            float lvl_scl = (float)pow(scl_fctr,(float)i);
            lvl_best[i] = ceil((max_feat / scl_sum) / lvl_scl);
            feat_sum += lvl_best[i];
        }
        lvl_best[max_levels-1] = max_feat - feat_sum;

        // Maintain a reference to previous level image
        Param prev_img;
        Param lvl_img;

        const unsigned gauss_len = 9;
        T* h_gauss = nullptr;
        Param gauss_filter;
        gauss_filter.data = nullptr;

        for (unsigned i = 0; i < max_levels; i++) {
            const float lvl_scl = (float)pow(scl_fctr,(float)i);

            if (i == 0) {
                // First level is used in its original size
                lvl_img = image;

                prev_img = image;
            }
            else if (i > 0) {
                // Resize previous level image to current level dimensions
                lvl_img.info.dims[0] = round(image.info.dims[0] / lvl_scl);
                lvl_img.info.dims[1] = round(image.info.dims[1] / lvl_scl);

                lvl_img.info.strides[0] = 1;
                lvl_img.info.strides[1] = lvl_img.info.dims[0];

                for (int k = 2; k < 4; k++) {
                    lvl_img.info.dims[k] = 1;
                    lvl_img.info.strides[k] = lvl_img.info.dims[k - 1] * lvl_img.info.strides[k - 1];
                }

                lvl_img.info.offset = 0;
                lvl_img.data = bufferAlloc(lvl_img.info.dims[3] * lvl_img.info.strides[3] * sizeof(T));

                resize<T, AF_INTERP_BILINEAR>(lvl_img, prev_img);

                if (i > 1)
                   bufferFree(prev_img.data);
                prev_img = lvl_img;
            }

            unsigned lvl_feat = 0;
            Param d_x_feat, d_y_feat, d_score_feat;

            // Round feature size to nearest odd integer
            float size = 2.f * floor(patch_size / 2.f) + 1.f;

            // Avoid keeping features that might be too wide and might not fit on
            // the image, sqrt(2.f) is the radius when angle is 45 degrees and
            // represents widest case possible
            unsigned edge = ceil(size * sqrt(2.f) / 2.f);

            // Detect FAST features
            fast<T, 9, true>(&lvl_feat, d_x_feat, d_y_feat, d_score_feat,
                             lvl_img, fast_thr, 0.15f, edge);

            if (lvl_feat == 0) {
                feat_pyr[i] = 0;

                if (i > 0 && i == max_levels-1)
                    bufferFree(lvl_img.data);

                continue;
            }

            bufferFree(d_score_feat.data);

            unsigned usable_feat = 0;
            cl::Buffer* d_usable_feat = bufferAlloc(sizeof(unsigned));
            getQueue().enqueueWriteBuffer(*d_usable_feat, CL_TRUE, 0, sizeof(unsigned), &usable_feat);

            cl::Buffer* d_x_harris = bufferAlloc(lvl_feat * sizeof(float));
            cl::Buffer* d_y_harris = bufferAlloc(lvl_feat * sizeof(float));
            cl::Buffer* d_score_harris = bufferAlloc(lvl_feat * sizeof(float));

            // Calculate Harris responses
            // Good block_size >= 7 (must be an odd number)
            const dim_type blk_x = divup(lvl_feat, ORB_THREADS_X);
            const NDRange local(ORB_THREADS_X, ORB_THREADS_Y);
            const NDRange global(blk_x * ORB_THREADS_X, ORB_THREADS_Y);

            unsigned block_size = 7;
            float k_thr = 0.04f;

            auto hrOp = make_kernel<Buffer, Buffer, Buffer,
                                    Buffer, Buffer, const unsigned,
                                    Buffer, Buffer, KParam,
                                    const unsigned, const float, const unsigned> (hrKernel[device]);

            hrOp(EnqueueArgs(getQueue(), global, local),
                 *d_x_harris, *d_y_harris, *d_score_harris,
                 *d_x_feat.data, *d_y_feat.data, lvl_feat,
                 *d_usable_feat, *lvl_img.data, lvl_img.info,
                 block_size, k_thr, patch_size);
            CL_DEBUG_FINISH(getQueue());

            getQueue().enqueueReadBuffer(*d_usable_feat, CL_TRUE, 0, sizeof(unsigned), &usable_feat);

            bufferFree(d_x_feat.data);
            bufferFree(d_y_feat.data);
            bufferFree(d_usable_feat);

            if (usable_feat == 0) {
                feat_pyr[i] = 0;

                bufferFree(d_x_harris);
                bufferFree(d_y_harris);
                bufferFree(d_score_harris);

                if (i > 0 && i == max_levels-1)
                    bufferFree(lvl_img.data);

                continue;
            }

            // Sort features according to Harris responses
            Param d_harris_sorted;
            Param d_harris_idx;

            d_harris_sorted.info.dims[0] = usable_feat;
            d_harris_idx.info.dims[0] = usable_feat;
            d_harris_sorted.info.strides[0] = 1;
            d_harris_idx.info.strides[0] = 1;

            for (int k = 1; k < 4; k++) {
                d_harris_sorted.info.dims[k] = 1;
                d_harris_idx.info.dims[k] = 1;
                d_harris_sorted.info.strides[k] = d_harris_sorted.info.dims[k - 1] * d_harris_sorted.info.strides[k - 1];
                d_harris_idx.info.strides[k] = d_harris_idx.info.dims[k - 1] * d_harris_idx.info.strides[k - 1];
            }

            d_harris_sorted.info.offset = 0;
            d_harris_idx.info.offset = 0;
            d_harris_sorted.data = d_score_harris;
            d_harris_idx.data = bufferAlloc((d_harris_idx.info.dims[0]) * sizeof(unsigned));

            sort0_index<float, false>(d_harris_sorted, d_harris_idx);

            cl::Buffer* d_x_lvl = bufferAlloc(usable_feat * sizeof(float));
            cl::Buffer* d_y_lvl = bufferAlloc(usable_feat * sizeof(float));
            cl::Buffer* d_score_lvl = bufferAlloc(usable_feat * sizeof(float));

            usable_feat = min(usable_feat, lvl_best[i]);

            // Keep only features with higher Harris responses
            const dim_type keep_blk = divup(usable_feat, ORB_THREADS);
            const NDRange local_keep(ORB_THREADS, 1);
            const NDRange global_keep(keep_blk * ORB_THREADS, 1);

            auto kfOp = make_kernel<Buffer, Buffer, Buffer,
                                    Buffer, Buffer, Buffer, Buffer,
                                    const unsigned> (kfKernel[device]);

            kfOp(EnqueueArgs(getQueue(), global_keep, local_keep),
                 *d_x_lvl, *d_y_lvl, *d_score_lvl,
                 *d_x_harris, *d_y_harris, *d_harris_sorted.data, *d_harris_idx.data,
                 usable_feat);
            CL_DEBUG_FINISH(getQueue());

            bufferFree(d_x_harris);
            bufferFree(d_y_harris);
            bufferFree(d_harris_sorted.data);
            bufferFree(d_harris_idx.data);

            cl::Buffer* d_ori_lvl = bufferAlloc(usable_feat * sizeof(float));
            cl::Buffer* d_size_lvl = bufferAlloc(usable_feat * sizeof(float));

            // Compute orientation of features
            const dim_type centroid_blk_x = divup(usable_feat, ORB_THREADS_X);
            const NDRange local_centroid(ORB_THREADS_X, ORB_THREADS_Y);
            const NDRange global_centroid(centroid_blk_x * ORB_THREADS_X, ORB_THREADS_Y);

            auto caOp = make_kernel<Buffer, Buffer, Buffer,
                                    const unsigned, Buffer, KParam,
                                    const unsigned> (caKernel[device]);

            caOp(EnqueueArgs(getQueue(), global_centroid, local_centroid),
                 *d_x_lvl, *d_y_lvl, *d_ori_lvl,
                 usable_feat, *lvl_img.data, lvl_img.info,
                 patch_size);
            CL_DEBUG_FINISH(getQueue());

            Param lvl_filt;
            Param lvl_tmp;

            if (blur_img) {
                lvl_filt = lvl_img;
                lvl_tmp = lvl_img;

                lvl_filt.data = bufferAlloc(lvl_filt.info.dims[0] * lvl_filt.info.dims[1] * sizeof(T));
                lvl_tmp.data = bufferAlloc(lvl_tmp.info.dims[0] * lvl_tmp.info.dims[1] * sizeof(T));

                // Calculate a separable Gaussian kernel
                if (h_gauss == nullptr) {
                    h_gauss = new T[gauss_len];
                    gaussian1D(h_gauss, gauss_len, 2.f);
                    gauss_filter.info.dims[0] = gauss_len;
                    gauss_filter.info.strides[0] = 1;

                    for (int k = 1; k < 4; k++) {
                        gauss_filter.info.dims[k] = 1;
                        gauss_filter.info.strides[k] = gauss_filter.info.dims[k - 1] * gauss_filter.info.strides[k - 1];
                    }

                    dim_type gauss_elem = gauss_filter.info.strides[3] * gauss_filter.info.dims[3];
                    gauss_filter.data = bufferAlloc(gauss_elem * sizeof(T));
                    getQueue().enqueueWriteBuffer(*gauss_filter.data, CL_TRUE, 0, gauss_elem * sizeof(T), h_gauss);
                }

                // Filter level image with Gaussian kernel to reduce noise sensitivity
                convolve2<T, convAccT, 0, false, gauss_len>(lvl_tmp, lvl_img, gauss_filter);
                convolve2<T, convAccT, 1, false, gauss_len>(lvl_filt, lvl_tmp, gauss_filter);

                bufferFree(lvl_tmp.data);
            }

            // Compute ORB descriptors
            cl::Buffer* d_desc_lvl = bufferAlloc(usable_feat * 8 * sizeof(unsigned));
            unsigned* h_desc_lvl = new unsigned[usable_feat * 8];
            for (int j = 0; j < (int)usable_feat * 8; j++)
                h_desc_lvl[j] = 0;
            getQueue().enqueueWriteBuffer(*d_desc_lvl, CL_TRUE, 0, usable_feat * 8 * sizeof(unsigned), h_desc_lvl);
            delete[] h_desc_lvl;

            auto eoOp = make_kernel<Buffer, const unsigned,
                                    Buffer, Buffer, Buffer, Buffer,
                                    Buffer, KParam,
                                    const float, const unsigned> (eoKernel[device]);

            if (blur_img) {
                eoOp(EnqueueArgs(getQueue(), global_centroid, local_centroid),
                     *d_desc_lvl, usable_feat,
                     *d_x_lvl, *d_y_lvl, *d_ori_lvl, *d_size_lvl,
                     *lvl_filt.data, lvl_filt.info,
                     lvl_scl, patch_size);
                CL_DEBUG_FINISH(getQueue());

                bufferFree(lvl_filt.data);
            }
            else {
                eoOp(EnqueueArgs(getQueue(), global_centroid, local_centroid),
                     *d_desc_lvl, usable_feat,
                     *d_x_lvl, *d_y_lvl, *d_ori_lvl, *d_size_lvl,
                     *lvl_img.data, lvl_img.info,
                     lvl_scl, patch_size);
                CL_DEBUG_FINISH(getQueue());
            }

            // Store results to pyramids
            total_feat += usable_feat;
            feat_pyr[i] = usable_feat;
            d_x_pyr[i] = d_x_lvl;
            d_y_pyr[i] = d_y_lvl;
            d_score_pyr[i] = d_score_lvl;
            d_ori_pyr[i] = d_ori_lvl;
            d_size_pyr[i] = d_size_lvl;
            d_desc_pyr[i] = d_desc_lvl;

            if (i > 0 && i == max_levels-1)
                bufferFree(lvl_img.data);
        }

        if (gauss_filter.data != nullptr)
            bufferFree(gauss_filter.data);
        if (h_gauss != nullptr)
            delete[] h_gauss;

        // If no features are found, set found features to 0 and return
        if (total_feat == 0) {
            *out_feat = 0;
            return;
        }

        // Allocate output memory
        x_out.info.dims[0] = total_feat;
        x_out.info.strides[0] = 1;
        y_out.info.dims[0] = total_feat;
        y_out.info.strides[0] = 1;
        score_out.info.dims[0] = total_feat;
        score_out.info.strides[0] = 1;
        ori_out.info.dims[0] = total_feat;
        ori_out.info.strides[0] = 1;
        size_out.info.dims[0] = total_feat;
        size_out.info.strides[0] = 1;

        desc_out.info.dims[0] = 8;
        desc_out.info.strides[0] = 1;
        desc_out.info.dims[1] = total_feat;
        desc_out.info.strides[1] = desc_out.info.dims[0];

        for (int k = 1; k < 4; k++) {
            x_out.info.dims[k] = 1;
            x_out.info.strides[k] = x_out.info.dims[k - 1] * x_out.info.strides[k - 1];
            y_out.info.dims[k] = 1;
            y_out.info.strides[k] = y_out.info.dims[k - 1] * y_out.info.strides[k - 1];
            score_out.info.dims[k] = 1;
            score_out.info.strides[k] = score_out.info.dims[k - 1] * score_out.info.strides[k - 1];
            ori_out.info.dims[k] = 1;
            ori_out.info.strides[k] = ori_out.info.dims[k - 1] * ori_out.info.strides[k - 1];
            size_out.info.dims[k] = 1;
            size_out.info.strides[k] = size_out.info.dims[k - 1] * size_out.info.strides[k - 1];
            if (k > 1) {
                desc_out.info.dims[k] = 1;
                desc_out.info.strides[k] = desc_out.info.dims[k - 1] * desc_out.info.strides[k - 1];
            }
        }

        if (total_feat > 0) {
            size_t out_sz  = total_feat * sizeof(float);
            x_out.data     = bufferAlloc(out_sz);
            y_out.data     = bufferAlloc(out_sz);
            score_out.data = bufferAlloc(out_sz);
            ori_out.data   = bufferAlloc(out_sz);
            size_out.data  = bufferAlloc(out_sz);

            size_t desc_sz = total_feat * 8 * sizeof(unsigned);
            desc_out.data  = bufferAlloc(desc_sz);
        }

        unsigned offset = 0;
        for (unsigned i = 0; i < max_levels; i++) {
            if (feat_pyr[i] == 0)
                continue;

            if (i > 0)
                offset += feat_pyr[i-1];

            getQueue().enqueueCopyBuffer(*d_x_pyr[i], *x_out.data, 0, offset*sizeof(float), feat_pyr[i] * sizeof(float));
            getQueue().enqueueCopyBuffer(*d_y_pyr[i], *y_out.data, 0, offset*sizeof(float), feat_pyr[i] * sizeof(float));
            getQueue().enqueueCopyBuffer(*d_score_pyr[i], *score_out.data, 0, offset*sizeof(float), feat_pyr[i] * sizeof(float));
            getQueue().enqueueCopyBuffer(*d_ori_pyr[i], *ori_out.data, 0, offset*sizeof(float), feat_pyr[i] * sizeof(float));
            getQueue().enqueueCopyBuffer(*d_size_pyr[i], *size_out.data, 0, offset*sizeof(float), feat_pyr[i] * sizeof(float));

            getQueue().enqueueCopyBuffer(*d_desc_pyr[i], *desc_out.data, 0, offset*8*sizeof(unsigned), feat_pyr[i] * 8 * sizeof(unsigned));

            bufferFree(d_x_pyr[i]);
            bufferFree(d_y_pyr[i]);
            bufferFree(d_score_pyr[i]);
            bufferFree(d_ori_pyr[i]);
            bufferFree(d_size_pyr[i]);
            bufferFree(d_desc_pyr[i]);
        }

        // Sets number of output features
        *out_feat = total_feat;
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}
Esempio n. 25
0
bool rtspRequestSend(RTSPRequest *rtspRequest, char *url, NetworkConnection *networkConnection) {
	uint8_t *requestBuffer;
	size_t maxRequestBufferSize;
	int charsWritten;

	/* Create buffer for full request (optimizer will get rid of all the individual constants) */
	maxRequestBufferSize = 12			/* "%s %s RTSP/1.0\r\n" printable characters */
		+ MAX_COMMAND_STRING_SIZE		/* command (first "%s" above) */
		+ MAX_URL_STRING_SIZE			/* url (second "%s" above) */
		+ rtspRequest->headerBufferSize - 1	/* header (excluding the terminating '\0') */
		+ 2					/* CR/LF */
		+ rtspRequest->contentBufferSize;	/* content */
	if(!bufferAllocate(&requestBuffer, maxRequestBufferSize, "RTSP request buffer")) {
		return false;
	}

	/* Write command */
	charsWritten = snprintf((char *)requestBuffer, maxRequestBufferSize, "%s %s RTSP/1.0\r\n", METHOD_NAMES[rtspRequest->requestMethod], rtspRequest->requestMethod == RTSP_METHOD_OPTIONS ? "*" : url);
	if(charsWritten < 0) {
		logWrite(LOG_LEVEL_ERROR, LOG_COMPONENT_NAME, "Cannot write command to request buffer.");
		bufferFree(&requestBuffer);
		return false;
	}

	/* Validate if amount of buffer is still enough (see explanation above for the following calculation) */
	if(charsWritten + rtspRequest->headerBufferSize - 1 + 2 + rtspRequest->contentBufferSize > maxRequestBufferSize) {
		logWrite(LOG_LEVEL_ERROR, LOG_COMPONENT_NAME, "Request buffer is not big enough to hold header and content.");
		bufferFree(&requestBuffer);
		return false;
	}

	/* Write header fields */
	if(rtspRequest->headerBuffer != NULL) {
		memcpy(requestBuffer + charsWritten, rtspRequest->headerBuffer, rtspRequest->headerBufferSize - 1);	/* No need for '\0' so -1 */
		charsWritten += rtspRequest->headerBufferSize - 1;
	}

	/* Write header/content separator (length validation done above) */
	requestBuffer[charsWritten] = '\r';
	charsWritten++;
	requestBuffer[charsWritten] = '\n';
	charsWritten++;

	/* Write content */
	if(rtspRequest->contentBuffer != NULL) {
		memcpy(requestBuffer + charsWritten, rtspRequest->contentBuffer, rtspRequest->contentBufferSize);
		charsWritten += rtspRequest->contentBufferSize;
	}

	/* Send out request */
	if(!networkSendMessage(networkConnection, requestBuffer, charsWritten)) {
		bufferFree(&requestBuffer);
		return false;
	}

	/* Write info from this message */
	logWrite(LOG_LEVEL_DEBUG, LOG_COMPONENT_NAME, "Sent out RTSP request:\n%.*s", charsWritten, requestBuffer);

	/* Free up resources */
	if(!bufferFree(&requestBuffer)) {
		return false;
	}

	return true;
}