예제 #1
0
        Block GPUTransientMemoryAllocator::allocate(GPUProgressTracking & progress, GPUTransientChunk & chunk,
                                                    size_t size, size_t alignment,
                                                    SeqNum cmdList)
        {
            size = roundUpToMultiple(size, alignment);

            auto &free = chunk.m_free;
            auto b = free.fitAtBegin(size, alignment);
#if 0
            XOR_GPU_TRANSIENT_VERBOSE("Trying to allocate %zu for list %lld in existing chunk (%lld, %lld).\n",
                                      size, cmdList,
                                      free.begin, free.end);
#endif

            // If the allocation fits in the previous active chunk, just use that.
            if (b)
            {
                free.begin = b.end;
#if 0
                XOR_GPU_TRANSIENT_VERBOSE("    Allocation successful. Chunk is now (%lld, %lld).\n",
                                          free.begin, free.end);
#endif
                return b;
            }
            // If not, get a new chunk.
            else
            {
                XOR_GPU_TRANSIENT_VERBOSE("    Existing chunk cannot hold allocation, getting new chunk for list %lld.\n",
                                          cmdList);

                XOR_CHECK(size <= static_cast<size_t>(m_chunkSize),
                          "Allocation does not fit in one chunk");

                ChunkNumber newChunk = findFreeChunk(progress);
                XOR_CHECK(newChunk >= 0, "There are no free or waitable chunks.");

                m_usedChunks.emplace_back(cmdList, newChunk);

                int64_t begin = newChunk * m_chunkSize;
                free = Block(begin, begin + m_chunkSize);

                auto b = free.fitAtBegin(size, alignment);
                XOR_ASSERT(b.valid(), "Allocation failed with an empty chunk");
                free.begin = b.end;
                return b;
            }
        }
예제 #2
0
파일: pio_server.c 프로젝트: AZed/cdo
static size_t
collDefBufferSizes()
{
  int *streamIndexList, vlistID, nvars, varID, iorank;
  int modelID;
  size_t sumGetBufferSizes = 0;
  int rankGlob = commInqRankGlob ();
  int nProcsModel = commInqNProcsModel ();
  int root = commInqRootGlob ();

  xassert(rxWin != NULL);

  unsigned nstreams = reshCountType ( &streamOps );
  streamIndexList = xmalloc((size_t)nstreams * sizeof (streamIndexList[0]));
  reshGetResHListOfType ( nstreams, streamIndexList, &streamOps );
  for (unsigned streamNo = 0; streamNo < nstreams; streamNo++)
    {
      // space required for data
      vlistID = streamInqVlist ( streamIndexList[streamNo] );
      nvars = vlistNvars ( vlistID );
      for ( varID = 0; varID < nvars; varID++ )
        {
          iorank = vlistInqVarIOrank ( vlistID, varID );
          xassert ( iorank != CDI_UNDEFID );
          if ( iorank == rankGlob )
            {
              for ( modelID = 0; modelID < nProcsModel; modelID++ )
                {
                  int decoChunk;
                  {
                    int varSize = vlistInqVarSize(vlistID, varID);
                    int nProcsModel = commInqNProcsModel();
                    decoChunk =
                      (int)ceilf(cdiPIOpartInflate_
                                 * (float)(varSize + nProcsModel - 1)
                                 / (float)nProcsModel);
                  }
                  xassert ( decoChunk > 0 );
                  rxWin[modelID].size += (size_t)decoChunk * sizeof (double)
                    /* re-align chunks to multiple of double size */
                    + sizeof (double) - 1
                    /* one header for data record, one for
                     * corresponding part descriptor*/
                    + 2 * sizeof (struct winHeaderEntry)
                    /* FIXME: heuristic for size of packed Xt_idxlist */
                    + sizeof (Xt_int) * (size_t)decoChunk * 3;
                  rxWin[modelID].dictSize += 2;
                }
            }
        }
      // space required for the 3 function calls streamOpen, streamDefVlist, streamClose 
      // once per stream and timestep for all collprocs only on the modelproc root
      rxWin[root].size += numRPCFuncs * sizeof (struct winHeaderEntry)
        /* serialized filename */
        + MAXDATAFILENAME
        /* data part of streamDefTimestep */
        + (2 * CDI_MAX_NAME + sizeof (taxis_t));
      rxWin[root].dictSize += numRPCFuncs;
    }
  free ( streamIndexList );

  for ( modelID = 0; modelID < nProcsModel; modelID++ )
    {
      /* account for size header */
      rxWin[modelID].dictSize += 1;
      rxWin[modelID].size += sizeof (struct winHeaderEntry);
      rxWin[modelID].size = roundUpToMultiple(rxWin[modelID].size,
                                              PIO_WIN_ALIGN);
      sumGetBufferSizes += (size_t)rxWin[modelID].size;
    }
  xassert ( sumGetBufferSizes <= MAXWINBUFFERSIZE );
  return sumGetBufferSizes;
}