示例#1
0
void Scheduler::signalProcessTermination(ParallelProcess *proc, ProcessRecord *rec) {
#if defined(DEBUG_SCHED)
	Log(rec->logLevel, "Process %i is complete.", rec->id);
#endif
	for (size_t i=0; i<m_workers.size(); ++i)
		m_workers[i]->signalProcessTermination(rec->id);
	/* The parallel process has been completed. Decrease the reference count
		of all used resources */
	const ParallelProcess::ResourceBindings &bindings = proc->getResourceBindings();
	for (ParallelProcess::ResourceBindings::const_iterator it = bindings.begin();
		it != bindings.end(); ++it) {
		unregisterResource((*it).second);
	}
	rec->done->set(true);
	m_processes.erase(proc);
	m_localQueue.erase(std::remove(m_localQueue.begin(), m_localQueue.end(), rec->id), 
		m_localQueue.end());
	m_remoteQueue.erase(std::remove(m_remoteQueue.begin(), m_remoteQueue.end(), rec->id), 
		m_remoteQueue.end());
	proc->m_returnStatus = ParallelProcess::ESuccess;
	m_idToProcess.erase(rec->id);
	delete rec;
	proc->decRef();
}
示例#2
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {

  unsigned int cmd, glhandle, gltarget, direction, flags, keepmapped, ispbo;
  unsigned long nrbytes;
  void *gpuptr;
  int slot = 0;
  cudaGraphicsResource_t resource = NULL;
  struct cudaArray *mappedArray = NULL;
  void* mappedPtr = NULL;
  size_t mappedSize = 0;

  /* Be optimistic, assume success unless told otherwise: */
  cudastatus = cudaSuccess;
  
  if (firsttime) {
      firsttime = 0;

      mexPrintf("\n%s: A simple CUDA <=> OpenGL interoperation interface.\n", mexFunctionName());
      mexPrintf("(c) 2013 by Mario Kleiner. Licensed to you under the MIT license.\n\n");

      /* Reset cache clock to zero and clear the cache: */
      cacheclock = 0;
      memset(resourceCache, 0, sizeof(resourceCache[0]) * MAX_CACHE_SLOTS);

      /* Start off with an effective cache capacity of 8 slots (1 slot is blocked from use): */
      cachesize = 8 + 1;
      firstLRUCycle = 1;

      /* Make sure the cache is flushed at mex file shutdown time: */
      mexAtExit(mexExit);
  }
  
  /* Retrieve command code: Give usage info if none given. */
  if (nrhs < 1) { usageExit(0); return; }

  cmd = (unsigned int) mxGetScalar(prhs[0]);

  /* Change of verbosity? */
  if (cmd == 6) {
    if (nrhs < 2) usageExit(1);  
    verbose = (unsigned int) mxGetScalar(prhs[1]);
    if (verbose) mexPrintf("\n%s: Verbose tracing of operations enabled.\n", mexFunctionName());
    return;
  }

  /* Resizing the LRU cache requested? */
  if (cmd == 5) {
    if (nrhs < 2) usageExit(1);

    /* Reset LRU cache full warning: */
    firstLRUCycle = 1;
    
    slot = (unsigned int) mxGetScalar(prhs[1]);
    
    /* Increment request by 1 to compensate for the "lost" slot 0: */
    slot = slot + 1;
    
    /* Child protections: */
    if (slot > MAX_CACHE_SLOTS) {
      mexPrintf("%s: Requested new softlimit %i for cache exceeds compiled in maximum %i. Will clamp to maximum.\n", mexFunctionName(), slot - 1, MAX_CACHE_SLOTS - 1);
      cachesize = MAX_CACHE_SLOTS;
      return;
    }
    
    if (slot < cachesize) {
      /* Shrinking the cache requested. This implies a full cache flush: */
      mexPrintf("%s: Requested new softlimit %i for cache is smaller than old softlimit %i. Will flush the cache before shrinking it.\n", mexFunctionName(), slot - 1, cachesize - 1);
      cacheFlush();
    }
    
    /* Set new softlimit: */
    cachesize = slot;
    mexPrintf("%s: New softlimit for LRU cache set to %i slots.\n", mexFunctionName(), cachesize - 1);
    
    return;
  }

  if (cmd == 0) {
    /* Cache flush requested: */
    cacheFlush();
    return;
  }

  /* Following ops require at least object handle and target type: */
  if (nrhs < 3) usageExit(1);

  /* Time to increment the age of our cached items by a clock tick: */
  ageCache();

  /* Retrieve OpenGL object handle to our image buffer: */
  glhandle = (unsigned int) mxGetScalar(prhs[1]);

  /* Get GLEnum target: */
  gltarget = (unsigned int) mxGetScalar(prhs[2]);
  
  if (cmd == 1) {
    /* Unmap resource if it is mapped: */
    unmapResource(glhandle, gltarget);
    return;
  }

  if (cmd == 2) {
    /* Unmap and unregister resource if it is mapped and/or registered: */
    unregisterResource(glhandle, gltarget);
    return;
  }

  if (nrhs < 6) usageExit(1);

  /* Retrieve CUDA memory pointer to source/destination CUDA memory buffer: */
  gpuptr = (void*) (unsigned long) mxGetScalar(prhs[3]);
  
  /* Retrieve number of bytes to copy: */
  nrbytes = (unsigned long) mxGetScalar(prhs[4]);
  
  /* Retrieve direction: 0 = OpenGL -> CUDA, 1 = CUDA -> OpenGL : */
  direction = (unsigned int) mxGetScalar(prhs[5]);

  /* Retrieve optional 'keepmapped' flag. */
  keepmapped = 0;
  if (nrhs >= 7) keepmapped = (unsigned int) mxGetScalar(prhs[6]);

  /* Define CUDA optimization flags, depending if this is a OpenGL->CUDA or
   * CUDA->OpenGL copy operation.
   */
  if ((nrhs >= 8) && (mxGetScalar(prhs[7]) >= 0)) {
    /* Override map flags provided. Use them: */
    flags = (unsigned int) mxGetScalar(prhs[7]);
  }
  else {
    /* Use auto-selected map flags: */
    flags = (direction) ? cudaGraphicsRegisterFlagsWriteDiscard : cudaGraphicsRegisterFlagsReadOnly;
  }

  /* Is gltarget a OpenGL pixelbuffer object? Check for gltarget == GL_PACK_BUFFER or GL_UNPACK_BUFFER. */
  ispbo = (gltarget == 35051 || gltarget == 35052) ? 1 : 0;

  /* Copy of data or mapped resource access pointer requested? */
  if (cmd == 3 || cmd == 4) {
    /* Register OpenGL object with CUDA as 'resource': */
    
    /* Already in cache? This would mean it is registered already with compatible mapping flags: */
    slot = cacheInsert(glhandle, gltarget, flags);
    if (slot < 0) {
      /* Not yet in cache. This means it is not registered at this time, either because it
         wasn't registered at all, or because it was registered with incompatible 'flags',
         so it just got unregistered and expelled from the cache. In any case, we need to
         insert it into the cache and register it. -slot is the free target slot for this
         purpose.
       */
       
      /* Turn slot into something useful: */
      slot = -slot;
    
      if (ispbo) {
        /* OpenGL Pixelbuffer object (GL_PACK_BUFFER or GL_UNPACK_BUFFER): */
        cudastatus = cudaGraphicsGLRegisterBuffer(&(resourceCache[slot].resource), glhandle, flags);
      }
      else {
        /* OpenGL texture or renderbuffer object: */
        cudastatus = cudaGraphicsGLRegisterImage(&(resourceCache[slot].resource), glhandle, gltarget, flags);
      }
      
      if (cudastatus != cudaSuccess) {
        mexPrintf("\nmemcpyCudaOpenGL: ERROR in %s(): %s\n", (ispbo) ? "cudaGraphicsGLRegisterBuffer" : "cudaGraphicsGLRegisterImage", cudaGetErrorString(cudastatus));
        resourceCache[slot].resource = NULL;
        goto err_final;
      }

      if (verbose) mexPrintf("\n%s: cacheInsert(%i): CUDA resource registered (globject %i, gltarget %i, flags %i).\n", mexFunctionName(), slot, glhandle, gltarget, flags);
      
      /* Fill cache slot: */
      resourceCache[slot].glhandle = glhandle;
      resourceCache[slot].gltarget = gltarget;
      resourceCache[slot].mapflags = flags;
      resourceCache[slot].lastaccess = cacheclock;
      resourceCache[slot].ismapped = 0;
    }
      
    /* At this point, the resource is stored in slot 'slot' of the cache and registered in a compatible way: */
    
    /* Map the 'resource', unless it is already mapped: */
    if (!resourceCache[slot].ismapped) {
      /* Map it: */
      cudastatus = cudaGraphicsMapResources(1, &(resourceCache[slot].resource), 0);
      if (cudastatus != cudaSuccess) {
        mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaGraphicsMapResources(): %s\n", cudaGetErrorString(cudastatus));
        goto err_unregister;
      }

      if (verbose) mexPrintf("\n%s: CUDA resource %i mapped (globject %i, gltarget %i, flags %i).\n", mexFunctionName(), slot, glhandle, gltarget, flags);
      
      /* Successfully mapped: */
      resourceCache[slot].ismapped = 1;
    }
    
    /* Get simpler handle: */
    resource = resourceCache[slot].resource;
    
    /* Get mapped resource image array handle or PBO pointer: */
    if (ispbo) {
      cudastatus = cudaGraphicsResourceGetMappedPointer(&mappedPtr, &mappedSize, resource);
    }
    else {
      cudastatus = cudaGraphicsSubResourceGetMappedArray(&mappedArray, resource, 0, 0);
    }
    
    if (cudastatus != cudaSuccess) {
      mexPrintf("\nmemcpyCudaOpenGL: ERROR in %s(): %s\n", (ispbo) ? "cudaGraphicsResourceGetMappedPointer" : "cudaGraphicsSubResourceGetMappedArray", cudaGetErrorString(cudastatus));
      goto err_unmap;
    }
  }
  
  /* Copy of PBO data between CUDA and OpenGL requested? */
  if (cmd == 3 && ispbo) {
    /* Copy from OpenGL PBO to CUDA buffer? */
    if (direction == 0) {
        /* OpenGL -> CUDA copy: */
        cudastatus = cudaMemcpyAsync(gpuptr, (const void*) mappedPtr, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0);
        if (cudastatus != cudaSuccess) {
            mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyAsync(): %s\n", cudaGetErrorString(cudastatus));
            goto err_unmap;
        }
    }

    if (direction == 1) {
        /* CUDA -> OpenGL copy: */
        cudastatus = cudaMemcpyAsync(mappedPtr, (const void*) gpuptr, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0);
        if (cudastatus != cudaSuccess) {
            mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyAsync(): %s\n", cudaGetErrorString(cudastatus));
            goto err_unmap;
        }
    }
  }

  /* Copy of texture or renderbuffer data between CUDA and OpenGL requested? */
  if (cmd == 3 && !ispbo) {
    /* Copy from OpenGL object to CUDA buffer? */
    if (direction == 0) {
        /* OpenGL -> CUDA copy: */
        cudastatus = cudaMemcpyFromArrayAsync(gpuptr, (const struct cudaArray*) mappedArray, 0, 0, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0);
        if (cudastatus != cudaSuccess) {
            mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyFromArray(): %s\n", cudaGetErrorString(cudastatus));
            goto err_unmap;
        }
    }

    if (direction == 1) {
        /* CUDA -> OpenGL copy: */
        cudastatus = cudaMemcpyToArrayAsync((struct cudaArray*) mappedArray, 0, 0, (const void*) gpuptr, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0);
        if (cudastatus != cudaSuccess) {
            mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyToArray(): %s\n", cudaGetErrorString(cudastatus));
            goto err_unmap;
        }
    }
  }
  
  /* Return of pointers to mapped resource requested? */
  if (cmd == 4) {
    /* Yes: This implies we must not unmap the resource now, as otherwise the
     * returned pointers would be dead on arrival.
     */
    keepmapped = 1;
    
    /* Cast pointer to void* then store it in a 64-Bit unsigned integer return value: */
    plhs[0] = mxCreateNumericMatrix(1, 1, mxUINT64_CLASS, mxREAL);
    *((unsigned long long*) mxGetData(plhs[0])) = (unsigned long long) (void*) ((ispbo) ? mappedPtr : mappedArray);
  }
  
  /* Keep resource mapped? */
  if (slot && !keepmapped) doCudaUnmap(slot);
  
  /* Successfully completed: */
  return;
  
  /* Error handling -- Unwind in reverse order: */
  
err_unmap:
  
  /* Unmap the 'resource': */
  unmapResource(glhandle, gltarget);

err_unregister:
  
  /* Unregister the 'resource': */
  unregisterResource(glhandle, gltarget);

err_final:      
      
  if (cudastatus != cudaSuccess) mexErrMsgTxt("Error in memcpyCudaOpenGL(), reason see above.");
}
示例#3
0
bool Scheduler::cancel(ParallelProcess *process, bool reduceInflight) {
	m_mutex->lock();
	std::map<const ParallelProcess *, ProcessRecord *>::iterator it = 
		m_processes.find(process);
	if (it == m_processes.end()) {
		m_mutex->unlock();
#if defined(DEBUG_SCHED)
		Log(EDebug, "Scheduler::cancel() - the process is not currently running");
#endif
		return false;
	}

	ProcessRecord *rec = (*it).second;
	if (reduceInflight) {
		--rec->inflight;
		rec->cond->signal();
	}

	if (rec->cancelled) {
#if defined(DEBUG_SCHED)
		Log(rec->logLevel, "Scheduler::cancel() - the process is already being cancelled. "
			"Waiting until this has happened..");
#endif
		m_mutex->unlock();
		wait(process);
		return true;
	}

#if defined(DEBUG_SCHED)
	Log(rec->logLevel, "Cancelling process %i (%i work units in flight)..", rec->id, rec->inflight);
#endif

	for (size_t i=0; i<m_workers.size(); ++i)
		m_workers[i]->signalProcessCancellation(rec->id);

	/* Ensure that this process won't be scheduled again */
	m_localQueue.erase(std::remove(m_localQueue.begin(), m_localQueue.end(), rec->id), 
		m_localQueue.end());
	m_remoteQueue.erase(std::remove(m_remoteQueue.begin(), m_remoteQueue.end(), rec->id), 
		m_remoteQueue.end());

	/* Ensure that the process won't be considered 'done' when the
	   last in-flight work unit is returned */
	rec->morework = true;
	rec->cancelled = true;

	/* Now wait until no more work from this process circulates and release
	   the lock while waiting. */
	while (rec->inflight != 0) 
		rec->cond->wait();

	/* Decrease the reference count of all bound resources */
	const ParallelProcess::ResourceBindings &bindings = process->getResourceBindings();
	for (ParallelProcess::ResourceBindings::const_iterator it = bindings.begin();
		it != bindings.end(); ++it) {
		unregisterResource((*it).second);
	}

	m_processes.erase(process);
	m_idToProcess.erase(rec->id);
	process->m_returnStatus = ParallelProcess::EFailure;

	try {
		process->handleCancellation();
	} catch (const std::exception &ex) {
		Log(EWarn, "Process %i's cancellation handler threw an exception.", ex.what());
	}

	/* Wake up any threads waiting on this process */
	rec->done->set(true);
	process->decRef();

#if defined(DEBUG_SCHED)
	Log(rec->logLevel, "Process %i was cancelled.", rec->id);
#endif

	delete rec;

	m_mutex->unlock();
	return true;
}