示例#1
0
THCStream* THCStream_newWithPriority(int flags, int priority)
{
  THCStream* self = (THCStream*) malloc(sizeof(THCStream));
  self->refcount = 1;
  THCudaCheck(cudaGetDevice(&self->device));
  THCudaCheck(cudaStreamCreateWithPriority(&self->stream, flags, priority));
  return self;
}
示例#2
0
CudaStream::CudaStream(bool high_priority = false) {
  if (high_priority) {
    int leastPriority, greatestPriority;
    CUDA_CHECK(cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));
    CUDA_CHECK(cudaStreamCreateWithPriority(&stream_, cudaStreamDefault, greatestPriority));
  } else {
    CUDA_CHECK(cudaStreamCreate(&stream_));
  }
  DLOG(INFO) << "New " << (high_priority ? "high priority " : "") << "stream "
      << stream_ << " on device " << current_device() << ", thread " << std::this_thread::get_id();
}
示例#3
0
void TaskScheduler::run()
{
    // Kahn's algorithm
    // https://en.wikipedia.org/wiki/Topological_sorting

    auto compareNodes = [] (Node* a, Node* b) {
        // lower number means higher priority
        return a->priority < b->priority;
    };
    std::priority_queue<Node*, std::vector<Node*>, decltype(compareNodes)> S(compareNodes);
    std::vector<std::pair<cudaStream_t, Node*>> workMap;

    for (auto& n : nodes)
    {
        n->from = n->from_backup;

        if (n->from.empty())
            S.push(n.get());
    }

    int completed = 0;
    const int total = nodes.size();

    while (true)
    {
        // Check the status of all running kernels
        while (completed < total && S.empty())
        {
            for (auto streamNode_it = workMap.begin(); streamNode_it != workMap.end(); )
            {
                auto result = cudaStreamQuery(streamNode_it->first);
                if ( result == cudaSuccess )
                {
                    auto node = streamNode_it->second;

                    debug("Completed group %s ", tasks[node->id].label.c_str());

                    // Return freed stream back to the corresponding queue
                    node->streams->push(streamNode_it->first);

                    // Remove resolved dependencies
                    for (auto dep : node->to)
                    {
                        if (!dep->from.empty())
                        {
                            dep->from.remove(node);
                            if (dep->from.empty())
                                S.push(dep);
                        }
                    }

                    // Remove task from the list of currently in progress
                    completed++;
                    streamNode_it = workMap.erase(streamNode_it);
                }
                else if (result == cudaErrorNotReady)
                {
                    streamNode_it++;
                }
                else
                {
                    error("Group '%s' raised an error",  tasks[streamNode_it->second->id].label.c_str());
                    CUDA_Check( result );
                }
            }
        }

        if (completed == total)
            break;

        Node* node = S.top();
        S.pop();

        cudaStream_t stream;
        if (node->streams->empty())
            CUDA_Check( cudaStreamCreateWithPriority(&stream, cudaStreamNonBlocking, node->priority) );
        else
        {
            stream = node->streams->front();
            node->streams->pop();
        }

        debug("Executing group %s on stream %lld with priority %d", tasks[node->id].label.c_str(), (int64_t)stream, node->priority);
        workMap.push_back({stream, node});

        for (auto& func_every : tasks[node->id].funcs)
            if (nExecutions % func_every.second == 0)
                func_every.first(stream);
    }

    nExecutions++;
    CUDA_Check( cudaDeviceSynchronize() );
}
示例#4
0
 Stream::Stream( bool blocking, int priority )
   : m_blocking( blocking )
 {
   CUDA_VERIFY( cudaStreamCreateWithPriority( &m_stream, m_blocking ? cudaStreamDefault : cudaStreamNonBlocking, priority ) );
   CUDA_VERIFY( cudaStreamGetPriority( m_stream, &m_priority ) );
 }