示例#1
0
文件: ocl.c 项目: mprymek/OpenCL
/* .External */
SEXP ocl_call(SEXP args) {
    struct arg_chain *float_args = 0;
    ocl_call_context_t *occ;
    int on, an = 0, ftype = FT_DOUBLE, ftsize, ftres, async;
    SEXP ker = CADR(args), olen, arg, res, octx, dimVec;
    cl_kernel kernel = getKernel(ker);
    cl_context context;
    cl_command_queue commands;
    cl_device_id device_id = getDeviceID(getAttrib(ker, Rf_install("device")));
    cl_mem output;
    size_t wdims[3] = {0, 0, 0};
    int wdim = 1;

    if (clGetKernelInfo(kernel, CL_KERNEL_CONTEXT, sizeof(context), &context, NULL) != CL_SUCCESS || !context)
	Rf_error("cannot obtain kernel context via clGetKernelInfo");
    args = CDDR(args);
    res = Rf_getAttrib(ker, install("precision"));
    if (TYPEOF(res) == STRSXP && LENGTH(res) == 1 && CHAR(STRING_ELT(res, 0))[0] != 'd')
	ftype = FT_SINGLE;
    ftsize = (ftype == FT_DOUBLE) ? sizeof(double) : sizeof(float);
    olen = CAR(args);  /* size */
    args = CDR(args);
    on = Rf_asInteger(olen);
    if (on < 0)
	Rf_error("invalid output length");
    ftres = (Rf_asInteger(CAR(args)) == 1) ? 1 : 0;  /* native.result */
    if (ftype != FT_SINGLE) ftres = 0;
    args = CDR(args);
    async = (Rf_asInteger(CAR(args)) == 1) ? 0 : 1;  /* wait */
    args = CDR(args);
    dimVec = coerceVector(CAR(args), INTSXP);  /* dim */
    wdim = LENGTH(dimVec);
    if (wdim > 3)
	Rf_error("OpenCL standard only supports up to three work item dimensions - use index vectors for higher dimensions");
    if (wdim) {
	int i; /* we don't use memcpy in case int and size_t are different */
	for (i = 0; i < wdim; i++)
	    wdims[i] = INTEGER(dimVec)[i];
    }
    if (wdim < 1 || wdims[0] < 1 || (wdim > 1 && wdims[1] < 1) || (wdim > 2 && wdims[2] < 1))
	Rf_error("invalid dimensions - muse be a numeric vector with positive values");

    args = CDR(args);
    occ = (ocl_call_context_t*) calloc(1, sizeof(ocl_call_context_t));
    if (!occ) Rf_error("unable to allocate ocl_call context");
    octx = PROTECT(R_MakeExternalPtr(occ, R_NilValue, R_NilValue));
    R_RegisterCFinalizerEx(octx, ocl_call_context_fin, TRUE);

    occ->output = output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, ftsize * on, NULL, &last_ocl_error);
    if (!output)
	Rf_error("failed to create output buffer of %d elements via clCreateBuffer (%d)", on, last_ocl_error);
    if (clSetKernelArg(kernel, an++, sizeof(cl_mem), &output) != CL_SUCCESS)
	Rf_error("failed to set first kernel argument as output in clSetKernelArg");
    if (clSetKernelArg(kernel, an++, sizeof(on), &on) != CL_SUCCESS)
	Rf_error("failed to set second kernel argument as output length in clSetKernelArg");
    occ->commands = commands = clCreateCommandQueue(context, device_id, 0, &last_ocl_error);
    if (!commands)
	ocl_err("clCreateCommandQueue");
    if (ftype == FT_SINGLE) /* need conversions, create floats buffer */
	occ->float_args = float_args = arg_alloc(0, 32);
    while ((arg = CAR(args)) != R_NilValue) {
	int n, ndiv = 1;
	void *ptr;
	size_t al;
	
	switch (TYPEOF(arg)) {
	case REALSXP:
	    if (ftype == FT_SINGLE) {
		int i;
		float *f;
		double *d = REAL(arg);
		n = LENGTH(arg);
		f = (float*) malloc(sizeof(float) * n);
		if (!f)
		    Rf_error("unable to allocate temporary single-precision memory for conversion from a double-precision argument vector of length %d", n);
		for (i = 0; i < n; i++) f[i] = d[i];
		ptr = f;
		al = sizeof(float);
		arg_add(float_args, ptr);
	    } else {
		ptr = REAL(arg);
		al = sizeof(double);
	    }
	    break;
	case INTSXP:
	    ptr = INTEGER(arg);
	    al = sizeof(int);
	    break;
	case LGLSXP:
	    ptr = LOGICAL(arg);
	    al = sizeof(int);
	    break;
	case RAWSXP:
	    if (inherits(arg, "clFloat")) {
		ptr = RAW(arg);
		ndiv = al = sizeof(float);
		break;
	    }
	default:
	    Rf_error("only numeric or logical kernel arguments are supported");
	    /* no-ops but needed to make the compiler happy */
	    ptr = 0;
	    al = 0;
	}
	n = LENGTH(arg);
	if (ndiv != 1) n /= ndiv;
	if (n == 1) {/* scalar */
	    if ((last_ocl_error = clSetKernelArg(kernel, an++, al, ptr)) != CL_SUCCESS)
		Rf_error("Failed to set scalar kernel argument %d (size=%d, error code %d)", an, al, last_ocl_error);
	} else {
	    cl_mem input = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,  al * n, ptr, &last_ocl_error);
	    if (!input)
		Rf_error("Unable to create buffer (%d elements, %d bytes each) for vector argument %d (oclError %d)", n, al, an, last_ocl_error);
	    if (!occ->mem_objects)
		occ->mem_objects = arg_alloc(0, 32);
	    arg_add(occ->mem_objects, input);
#if 0 /* we used this before CL_MEM_USE_HOST_PTR */
	    if ((last_ocl_error = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, al * n, ptr, 0, NULL, NULL)) != CL_SUCCESS)
		Rf_error("Failed to transfer data (%d elements) for vector argument %d (oclError %d)", n, an, last_ocl_error);
#endif
	    if ((last_ocl_error = clSetKernelArg(kernel, an++, sizeof(cl_mem), &input)) != CL_SUCCESS)
		Rf_error("Failed to set vector kernel argument %d (size=%d, length=%d, error %d)", an, al, n, last_ocl_error);
	    /* clReleaseMemObject(input); */
	}
	args = CDR(args);
    }

    if ((last_ocl_error = clEnqueueNDRangeKernel(commands, kernel, wdim, NULL, wdims, NULL, 0, NULL, async ? &occ->event : NULL)) != CL_SUCCESS)
	ocl_err("Kernel execution");

    if (async) { /* asynchronous call -> get out and return the context */
#if USE_OCL_COMPLETE_CALLBACK
	last_ocl_error = clSetEventCallback(occ->event, CL_COMPLETE, ocl_complete_callback, occ);
#endif
	clFlush(commands); /* the specs don't guarantee execution unless clFlush is called */
	occ->ftres = ftres;
	occ->ftype = ftype;
	occ->on = on;
	Rf_setAttrib(octx, R_ClassSymbol, mkString("clCallContext"));
	UNPROTECT(1);
	return octx;
    }

    clFinish(commands);
    occ->finished = 1;

    /* we can release input memory objects now */
    if (occ->mem_objects) {
      arg_free(occ->mem_objects, (afin_t) clReleaseMemObject);
      occ->mem_objects = 0;
    }
    if (float_args) {
      arg_free(float_args, 0);
      float_args = occ->float_args = 0;
    }

    res = ftres ? Rf_allocVector(RAWSXP, on * sizeof(float)) : Rf_allocVector(REALSXP, on);
    if (ftype == FT_SINGLE) {
	if (ftres) {
	  if ((last_ocl_error = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * on, RAW(res), 0, NULL, NULL )) != CL_SUCCESS)
		Rf_error("Unable to transfer result vector (%d float elements, oclError %d)", on, last_ocl_error);
	    PROTECT(res);
	    Rf_setAttrib(res, R_ClassSymbol, mkString("clFloat"));
	    UNPROTECT(1);
	} else {
	    /* float - need a temporary buffer */
	    float *fr = (float*) malloc(sizeof(float) * on);
	    double *r = REAL(res);
	    int i;
	    if (!fr)
		Rf_error("unable to allocate memory for temporary single-precision output buffer");
	    occ->float_out = fr;
	    if ((last_ocl_error = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * on, fr, 0, NULL, NULL )) != CL_SUCCESS)
		Rf_error("Unable to transfer result vector (%d float elements, oclError %d)", on, last_ocl_error);
	    for (i = 0; i < on; i++)
		r[i] = fr[i];
	}
    } else if ((last_ocl_error = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(double) * on, REAL(res), 0, NULL, NULL )) != CL_SUCCESS)
	Rf_error("Unable to transfer result vector (%d double elements, oclError %d)", on, last_ocl_error);

    ocl_call_context_fin(octx);
    UNPROTECT(1);
    return res;
}
示例#2
0
/**
* selfTest() depends upon periodic purging.   Every call, it will first fill up the database, taking memory snapshots,
* then purge, taking more memory snapshots.   It creates a number of random data objects, inserts them into the system,
* and records current memory usage into a file called 'mem.results', for every interation.   It does in the following steps:
*
* 1.  Add, approximately, 20 DO's per second, done every poll period (e.g. 10 seconds, means add 200 for that interval).
*
* 2.  When the number of DO's in the system match the threshold setting, we drop the threshold by the same number
* we increased it in step 1.   In this case, we decrease it by 20, allowing the memory threshold purger to do its job.
*
* 3.  Repeat step 1.
*
* 4.  Repeat step 2.
*
* 5.  Reset system functions to original defaults.
*
* This allows the system to approach maximum usage, drop to zero, back to maximum usage, then back to zero.
* The file can be parse into a plot showing how much memory is freed.   This test works regardless of using
* in memory database, the improved all memory database, or disk based database.
* 
* WARNING: We use mallinfo() to determine the amount of memory used and released, as the system does NOT
* see any memory freed at all, due to the fact dlmallopt(-1) is set in main.cpp.  This tells free to not
* return freed memory to the system.    Thus, using mallinfo is the only means available to show how much
* memory is actually freed to the application (but not to the system).
* 
*  
*/
void
CacheStrategyUtility::selfTest()
{
    static int init=0;
    static float amount_do=0;
    static int count=0;
    char countStr[50];
    static float threshold_backup;
    static char *direction;
    if (!init) {
       init=1;
       threshold_backup=db_size_threshold;
       amount_do=20.0*pollPeriodMs/1000; //20 per second seems reasonable
       if (amount_do > db_size_threshold/10) {
         amount_do = db_size_threshold/10;
       }
       direction="Start";

    }   // JM: Start DB purging
        // JM: Testing code only, to prove it works.   Lets do linear testing.
        struct mallinfo mi=mallinfo();
	//Due to file permission difficulties in android, we'll write it as a log
        HAGGLE_DBG("\nThreshold(%s): %lld/%d -- Used bytes: %d, Free bytes: %d, SQL: %lld\n\n", direction, db_size_threshold,current_num_do, mi.uordblks, mi.fordblks, sqlite3_memory_used());
        //send db_size_threshold DO's
	//init = 1 means send DO's
	//init = 2 means we are in purging mode
        if ((init == 1) || (init == 3)) {
          float upperlimit=current_num_do+amount_do;
          if (upperlimit > db_size_threshold) {
            upperlimit = db_size_threshold+1;
	    init++;
          }
          if (init ==1) {
             direction="Up1";
          } else if (init == 3) {
             direction="Up2";
          } else { 
             direction="StateChangeFromUp";
          }

          for(int i=current_num_do; i<upperlimit; i++) {
	    DataObjectRef dObj = createDataObject(2);
	    dObj->addAttribute("ContentOriginator", "self");
	    dObj->addAttribute("ContentType", "DelByRelTTL");
	    dObj->addAttribute("ContentType2", "DelByAbsTTL");
	    dObj->addAttribute("purge_by_timestamp", "2000000000");
	    dObj->addAttribute("purge_after_seconds", "2000000000");
            char buffer[1025];
            snprintf(buffer, 1024, "%llu", (unsigned long long)time(NULL));
            sprintf(countStr, "%d", count++);
	    dObj->addAttribute("ContentCreationTime", buffer);
	    dObj->addAttribute("count", countStr);
            dObj->calcId();
	    _handleNewDataObject(dObj);
          }
         } else if ((init == 2) || (init == 4)) {   //init==2, reduction
            db_size_threshold -= amount_do;
            if (db_size_threshold < 0.0) {
              init++;
              db_size_threshold=threshold_backup;
            }
            if (init == 2) {
               direction="Down1";
            } else if (init == 4) {
               direction="Down2";
            } else {
               direction="StateChangeFromDown";
            }
         } else { //if (init == 5) 
           //clear seltTest?
           self_test = false;
           db_size_threshold=threshold_backup;
           HAGGLE_DBG("Self Test completed!\n");
           //remove any last DO's
           //write any STAT information
           getKernel()->shutdown();
           //return;
         }
        // JM: End testing
}
示例#3
0
ProtocolEvent ProtocolUDPGeneric::receiveData(
    void *buf, 
    size_t len, 
    const int flags, 
    size_t *bytes)
{
    *bytes = 0;

    ProtocolEvent pEvent;
    ssize_t sbytes;

    SocketWrapper *receiveSocket = getReadEndOfReceiveSocket();
    if (NULL == receiveSocket) {
        HAGGLE_ERR("%s Could not get receive socket\n", getName());
        return PROT_EVENT_ERROR_FATAL;
    }

    size_t newLength = len + sizeof(udpmsg_t);
    void *newBuff = malloc(newLength);
    bzero(newBuff, newLength);

    pEvent = receiveSocket->receiveData(newBuff, newLength, flags, &sbytes);
    if (sbytes < 0 || PROT_EVENT_SUCCESS != pEvent) {
        HAGGLE_ERR("%s Receive failed\n", getName());
        free(newBuff);
        return PROT_EVENT_ERROR;
    }
    *bytes = static_cast<size_t>(sbytes) - sizeof(udpmsg_t);

    if (*bytes <= 0) {
        HAGGLE_ERR("%s Receive failed (size was wrong)\n", getName());
        free(newBuff);
        return PROT_EVENT_ERROR;
    }

    memcpy(lastReceivedSessionNo, getSessionNoFromMsg((const char *)newBuff), sizeof(DataObjectId_t));
    lastReceivedSeqNo = getSeqNoFromMsg((const char *)newBuff);
    unsigned long rcvSrcIP = getSrcIPFromMsg((const char *)newBuff);
    //unsigned long rcvDestIP = getDestIPFromMsg((const char *)newBuff);

    HAGGLE_DBG2("%s Got packet with session no %s, sequence no %d, and size %d\n",
        getName(), 
	DataObject::idString(lastReceivedSessionNo).c_str(), 
        lastReceivedSeqNo, 
        *bytes);

    memcpy(buf, (void *)&(((char *)newBuff)[sizeof(udpmsg_t)]), len);

    free(newBuff);

    if (rcvSrcIP == srcIP) {
        HAGGLE_ERR("%s Somehow received our own UDP packet: %d\n", getName(), srcIP);
        return PROT_EVENT_ERROR;
    }

    // MOS: START SETTING PEER NODE
    Mutex::AutoLocker l(mutex); // MOS
    if (!peerIface) {
        HAGGLE_ERR("%s UDP peer interface was null\n", getName());
        return PROT_EVENT_ERROR;
    }

    // MOS - Protocol.cpp relies on peerNode for debugging
    NodeRef peer = getKernel()->getNodeStore()->retrieve(peerIface);
    if(peer) peerNode = peer;

    if (!peerNode) {
        peerNode = Node::create(Node::TYPE_UNDEFINED, "Peer node");      
        if (!peerNode) {      
	    HAGGLE_ERR("%s Could not create peer node\n", getName());
            return PROT_EVENT_ERROR;
        }
        peerNode->addInterface(peerIface);
    }
    // MOS: END SETTING PEER NODE

    return pEvent;
}
示例#4
0
ProtocolEvent ProtocolUDPGeneric::receiveDataObjectNoControl()
{
    ProtocolEvent pEvent;
    size_t len;
    pEvent = receiveData(buffer, bufferSize, MSG_DONTWAIT, &len);
    if (pEvent != PROT_EVENT_SUCCESS) {
        return pEvent;
    }

    buffer[bufferSize-1] = '\0';

    if (len == 0) {
        HAGGLE_DBG("%s Received zero-length message\n", getName());
        return PROT_EVENT_ERROR;
    }

    if(lastReceivedSessionNo == lastValidReceivedSessionNo && lastReceivedSeqNo == lastValidReceivedSeqNo) {
      HAGGLE_DBG("%s Ignoring duplicate message - session no %s sequence no %d\n", getName(), DataObject::idString(lastValidReceivedSessionNo).c_str(), lastReceivedSeqNo);
      return PROT_EVENT_SUCCESS;
    }

    memcpy(lastValidReceivedSessionNo, lastReceivedSessionNo, sizeof(DataObjectId_t)); 
    lastValidReceivedSeqNo = lastReceivedSeqNo; 

    // MOS - fastpath based on session id = data object id
    
    if (getKernel()->getThisNode()->getBloomfilter()->has(lastValidReceivedSessionNo)) {
      HAGGLE_DBG("%s Data object (session no %s) already in bloom filter - no event generated\n", getName(), DataObject::idString(lastValidReceivedSessionNo).c_str()); 
	dataObjectsNotReceived += 1; // MOS
        return PROT_EVENT_SUCCESS;
    }

    
    // MOS - quickly add to Bloom filter to reduce redundant processing in other procotols
    getKernel()->getThisNode()->getBloomfilter()->add(lastValidReceivedSessionNo);

    DataObjectRef dObj = DataObject::create_for_putting(localIface,
                                                        peerIface,  
                                                        getKernel()->getStoragePath());
    if (!dObj) {
        HAGGLE_DBG("%s Could not create data object\n", getName());
        return PROT_EVENT_ERROR;
    }

    size_t bytesRemaining = DATAOBJECT_METADATA_PENDING;
    ssize_t bytesPut = dObj->putData(buffer, len, &bytesRemaining, true);

    if (bytesPut < 0) {
        HAGGLE_ERR("%s Could not put data\n", getName());
        return PROT_EVENT_ERROR;
    }

    if(bytesRemaining != len - bytesPut) {
        HAGGLE_ERR("%s Received data object not complete - discarding\n", getName());
        return PROT_EVENT_ERROR;
    }

    HAGGLE_DBG("%s Metadata header received [%s].\n", getName(), dObj->getIdStr());
    
    dObj->setReceiveTime(Timeval::now());

    // MOS - the following was happening after posting INCOMING but that distorts the statistics
    HAGGLE_DBG("%s %ld bytes data received (including header), %ld bytes put\n", getName(), len, bytesPut);
    dataObjectsIncoming += 1; // MOS
    if(!dObj->isControlMessage()) dataObjectsIncomingNonControl += 1; // MOS
    dataObjectBytesIncoming += len; // MOS

    HAGGLE_DBG("%s Received data object [%s] from node %s\n", getName(), 
	       DataObject::idString(dObj).c_str(), peerDescription().c_str()); 
    // MOS - removed interface due to locking issue

    if (getKernel()->getThisNode()->getBloomfilter()->hasParentDataObject(dObj)) {
        HAGGLE_DBG("%s Data object [%s] already in bloom filter - no event generated\n", getName(), DataObject::idString(dObj).c_str()); 
	dataObjectsNotReceived += 1; // MOS
        return PROT_EVENT_SUCCESS;
    }

    NodeRef node = Node::create(dObj);
    if (node && (node == getKernel()->getThisNode())) {
        HAGGLE_DBG("%s Received own node description, discarding early.\n", getName());
	dataObjectsNotReceived += 1; // MOS
        return PROT_EVENT_SUCCESS;
    }

    // MOS - this now happens even before xml parsing
    // getKernel()->getThisNode()->getBloomfilter()->add(dObj);

    if(bytesRemaining > 0) {
      ssize_t bytesPut2 = dObj->putData(buffer + bytesPut, len - bytesPut, &bytesRemaining, false);
      HAGGLE_DBG("%s processing payload - %ld bytes put\n", getName(), bytesPut2);

      if (bytesPut2 < 0) {
        HAGGLE_ERR("%s Could not put data\n", getName());
        return PROT_EVENT_ERROR;
      }
      
      if(bytesRemaining != 0) {
        HAGGLE_ERR("%s Received data object not complete - discarding\n", getName());
        return PROT_EVENT_ERROR;
      }
    }

    NodeRef currentPeer;
    { Mutex::AutoLocker l(mutex); currentPeer = peerNode; } // MOS

    // Generate first an incoming event to conform with the base Protocol class
    getKernel()->addEvent(new Event(EVENT_TYPE_DATAOBJECT_INCOMING, dObj, currentPeer));

    receiveDataObjectSuccessHook(dObj);

    // Since there is no data following, we generate the received event immediately 
    // following the incoming one
    getKernel()->addEvent(new Event(EVENT_TYPE_DATAOBJECT_RECEIVED, dObj, currentPeer));

    dataObjectsReceived += 1; // MOS
    dataObjectBytesReceived += len; // MOS
    if(dObj->isNodeDescription()) { nodeDescReceived += 1; nodeDescBytesReceived += len; } // MOS

    return PROT_EVENT_SUCCESS;
}
示例#5
0
bool ProtocolUDPGeneric::init_derived()
{
    if (!peerIface) {
        HAGGLE_ERR("%s No peer interface\n", getName());
        return false;
    }

    if (!localIface) {
        HAGGLE_ERR("%s No local interface\n", getName());
        return false;
    }

    SOCKET sockets[2];
    if (0 > socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets)) {
        HAGGLE_ERR("%s Could not create socket pair for receiver: %s\n", getName(), STRERROR(ERRNO));
        HAGGLE_ERR("FATAL ERROR - EMERGENCY SHUTDOWN INITIATED\n"); // MOS
	getKernel()->setFatalError();
	getKernel()->shutdown();
        return false;
    }
    else
      {
        HAGGLE_DBG2("%s Opening socketpair (%d,%d)\n", getName(), sockets[0], sockets[1]);
      }

    writeEndOfReceiveSocket =
        new SocketWrapper(getKernel(), getManager(), sockets[0]);

    if (!writeEndOfReceiveSocket) {
        HAGGLE_ERR("%s Could not allocate write end\n", getName());
        return false;
    }

    if (!writeEndOfReceiveSocket->multiplySndBufferSize(2)) {
        HAGGLE_ERR("%s Could not multiply buffer size.\n", getName());
        return false;
    }

    readEndOfReceiveSocket =
        new SocketWrapper(getKernel(), getManager(), sockets[1]);

    if (!readEndOfReceiveSocket) {
        HAGGLE_ERR("%s Could not allocate read end\n", getName());
        return false;
    }

    if (!readEndOfReceiveSocket->multiplyRcvBufferSize(2)) {
        HAGGLE_ERR("%s Could not multiply buffer size.\n", getName());
        return false;
    }

    srcIP = interfaceToIP(localIface);

    if (0 == srcIP) {
        HAGGLE_ERR("%s Could not get source IP.\n");
        return false;
    }

    destIP = interfaceToIP(peerIface);

    if (0 == destIP) {
        HAGGLE_ERR("%s Could not get dest IP.\n");
        return false;
    }

    return initbase();
}