Example #1
0
void* allocPages(void* addr, size_t len, size_t align)
{
    RELEASE_ASSERT(len < INT_MAX - align);
    ASSERT(len >= kPageAllocationGranularity);
    ASSERT(!(len & kPageAllocationGranularityOffsetMask));
    ASSERT(align >= kPageAllocationGranularity);
    ASSERT(!(align & kPageAllocationGranularityOffsetMask));
    ASSERT(!(reinterpret_cast<uintptr_t>(addr) & kPageAllocationGranularityOffsetMask));
    size_t alignOffsetMask = align - 1;
    size_t alignBaseMask = ~alignOffsetMask;
    ASSERT(!(reinterpret_cast<uintptr_t>(addr) & alignOffsetMask));
    // If the client passed null as the address, choose a good one.
    if (!addr) {
        addr = getRandomPageBase();
        addr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(addr) & alignBaseMask);
    }

    // The common case, which is also the least work we can do, is that the
    // address and length are suitable. Just try it.
    void* ret = systemAllocPages(addr, len);
    // If the alignment is to our liking, we're done.
    if (!(reinterpret_cast<uintptr_t>(ret) & alignOffsetMask))
        return ret;

    // Annoying. Unmap and map a larger range to be sure to succeed on the
    // second, slower attempt.
    freePages(ret, len);

    size_t tryLen = len + (align - kPageAllocationGranularity);

    // We loop to cater for the unlikely case where another thread maps on top
    // of the aligned location we choose.
    int count = 0;
    while (count++ < 100) {
        ret = systemAllocPages(addr, tryLen);
        // We can now try and trim out a subset of the mapping.
        addr = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(ret) + alignOffsetMask) & alignBaseMask);

        // On POSIX systems, we can trim the oversized mapping to fit exactly.
        // This will always work on POSIX systems.
        if (trimMapping(ret, tryLen, addr, len))
            return addr;

        // On Windows, you can't trim an existing mapping so we unmap and remap
        // a subset. We used to do for all platforms, but OSX 10.8 has a
        // broken mmap() that ignores address hints for valid, unused addresses.
        freePages(ret, tryLen);
        ret = systemAllocPages(addr, len);
        if (ret == addr)
            return ret;

        // Unlikely race / collision. Do the simple thing and just start again.
        freePages(ret, len);
        addr = getRandomPageBase();
        addr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(addr) & alignBaseMask);
    }
    IMMEDIATE_CRASH();
    return 0;
}
// Trims base to given length and alignment. Windows returns null on failure and frees base.
static void* trimMapping(void *base, size_t baseLen, size_t trimLen, uintptr_t align, PageAccessibilityConfiguration pageAccessibility)
{
    size_t preSlack = reinterpret_cast<uintptr_t>(base) & (align - 1);
    if (preSlack)
        preSlack = align - preSlack;
    size_t postSlack = baseLen - preSlack - trimLen;
    ASSERT(baseLen >= trimLen || preSlack || postSlack);
    ASSERT(preSlack < baseLen);
    ASSERT(postSlack < baseLen);
    void* ret = base;

#if OS(POSIX) // On POSIX we can resize the allocation run.
    (void) pageAccessibility;
    if (preSlack) {
        int res = munmap(base, preSlack);
        RELEASE_ASSERT(!res);
        ret = reinterpret_cast<char*>(base) + preSlack;
    }
    if (postSlack) {
        int res = munmap(reinterpret_cast<char*>(ret) + trimLen, postSlack);
        RELEASE_ASSERT(!res);
    }
#else // On Windows we can't resize the allocation run.
    if (preSlack || postSlack) {
        ret = reinterpret_cast<char*>(base) + preSlack;
        freePages(base, baseLen);
        ret = systemAllocPages(ret, trimLen, pageAccessibility);
    }
#endif

    return ret;
}
void* allocPages(void* addr, size_t len, size_t align, PageAccessibilityConfiguration pageAccessibility)
{
    ASSERT(len >= kPageAllocationGranularity);
    ASSERT(!(len & kPageAllocationGranularityOffsetMask));
    ASSERT(align >= kPageAllocationGranularity);
    ASSERT(!(align & kPageAllocationGranularityOffsetMask));
    ASSERT(!(reinterpret_cast<uintptr_t>(addr) & kPageAllocationGranularityOffsetMask));
    uintptr_t alignOffsetMask = align - 1;
    uintptr_t alignBaseMask = ~alignOffsetMask;
    ASSERT(!(reinterpret_cast<uintptr_t>(addr) & alignOffsetMask));

    // If the client passed null as the address, choose a good one.
    if (!addr) {
        addr = getRandomPageBase();
        addr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(addr) & alignBaseMask);
    }

    // First try to force an exact-size, aligned allocation from our random base.
    for (int count = 0; count < 3; ++count) {
        void* ret = systemAllocPages(addr, len, pageAccessibility);
        if (kHintIsAdvisory || ret) {
            // If the alignment is to our liking, we're done.
            if (!(reinterpret_cast<uintptr_t>(ret)& alignOffsetMask))
                return ret;
            freePages(ret, len);
#if CPU(32BIT)
            addr = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(ret)+align) & alignBaseMask);
#endif
        } else if (!addr) { // We know we're OOM when an unhinted allocation fails.
            return nullptr;

        } else {
#if CPU(32BIT)
            addr = reinterpret_cast<char*>(addr) + align;
#endif
        }

#if !CPU(32BIT) // Keep trying random addresses on systems that have a large address space.
        addr = getRandomPageBase();
        addr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(addr) & alignBaseMask);
#endif
    }

    // Map a larger allocation so we can force alignment, but continue randomizing only on 64-bit POSIX.
    size_t tryLen = len + (align - kPageAllocationGranularity);
    RELEASE_ASSERT(tryLen >= len);
    void* ret;

    do {
        // Don't continue to burn cycles on mandatory hints (Windows).
        addr = kHintIsAdvisory ? getRandomPageBase() : nullptr;
        ret = systemAllocPages(addr, tryLen, pageAccessibility);
    // The retries are for Windows, where a race can steal our mapping on resize.
    } while (ret && !(ret = trimMapping(ret, tryLen, len, align, pageAccessibility)));

    return ret;
}
Example #4
0
void deleteHeap(Heap *h)
{
  freePages(h->r0copy);
  freePages(h->r2copy);
  freePages(h->r3copy);
  freePages(h->r4copy);
  freePages(h->r5copy);
  freePages(h->r6copy);
  free(h);
}
PageCache::~PageCache() {
    freePages(&mActivePages);
    freePages(&mFreePages);
}
Example #6
0
int dgemmsy_base(dgemmsyBaseArgs * args)
{
  int status = 0; // return value
  int row,col;
  size_t slice_size,ywork_size;
  int p2; // P rounded to next multiple of 4
  double *a_slice,*b_slice,*y_work;
  ComputeData cdata;
  TransposeData tdata;
  int slice_n,n,p;
  const double *a,*b;
  double *y;
  int lda,ldb,ldy;
  int transa,transb;
  BlockPattern_2x4_Proc pattern;
  void * pattern_arg;
  double alpha;

  if (args == 0) return -2;

  pattern = args->params.pattern;
  pattern_arg = args->params.pattern_arg;
  slice_n = args->params.slice_n;

  transa = args->transa;
  transb = args->transb;
  n = args->n;
  p = args->p;
  a = args->a;
  lda = args->lda;
  b = args->b;
  ldb = args->ldb;
  y = args->y;
  ldy = args->ldy;
  alpha = args->alpha;

  // Check dimensions
  if (slice_n < 8) return -1;
  if ( n <= 0 || p <= 0 ) return -1;
  p2 = (p+3) & 0x7FFFFFFC;

  // printf("N=%d P=%d LDA=%d LDB=%d LDY=%d  P2=%d\n",n,p,lda,ldb,ldy,p2);

  // Check other arguments
  if (a == 0 || b == 0 || y == 0 || pattern == 0) return -5;

  // Allocate memory
  slice_size = slice_n * p2 * sizeof(double); // Slice size in bytes
  ywork_size = p2 * p2 * sizeof(double); // Result size in bytes
  a_slice = (double *)allocPages(slice_size);
  b_slice = (double *)allocPages(slice_size);
  y_work = (double *)allocPages(ywork_size);
  if (a_slice == 0 || b_slice == 0 || y_work == 0) { status = -3; goto END; }
  memset(y_work,0,ywork_size);

  // Loop on all slices and accumulate products
  initComputeData(&cdata,p2,slice_n,a_slice,b_slice,y_work);
  for (row=0;row<n;row+=slice_n)
  {
    int s = slice_n;
    if (row+s > n) s = n-row; // Limit size of last slice if needed

    // 2-pack A slice
    if (transa) tpack_2(s,p,a+lda*row,lda, slice_n,p2,a_slice);
    else npack_2(s,p,a+row,lda, slice_n,p2,a_slice);

    // 4-pack B slice
    if (transb) tpack_4(s,p,b+ldb*row,ldb, slice_n,p2,b_slice);
    else npack_4(s,p,b+row,ldb, slice_n,p2,b_slice);

    pattern(pattern_arg,p2,Compute_visitor,&cdata);
  }
  cleanupComputeData(&cdata);

  // Complete result by symmetry
  initTransposeData(&tdata,p2,y_work);
  pattern(pattern_arg,p2,Transpose_visitor,&tdata);
  cleanupTransposeData(&tdata);

  // Combine and store (untransposed) result. If we are multithreading,
  // we must protect the update with a mutex, since all threads
  // will update the same Y.
  if (args->yMutex != 0)
    {
      int locked = pthread_mutex_lock(args->yMutex);
      if (locked != 0) status = -4;
    }
  for (col=0;col<p;col++)
    {
      double * yy = y+ldy*col;
      double * yy_work = y_work+p2*col;
#if USE_AXPY
      cblas_daxpy(p,alpha,yy_work,1,yy,1);
#else
      if (alpha == 1)
	{
	  for (row=0;row<p;row++) yy[row] += yy_work[row];
	}
      else
	{
	  for (row=0;row<p;row++) yy[row] += alpha * yy_work[row];
	}
#endif
    }
  if (args->yMutex != 0)
    {
      int unlocked = pthread_mutex_unlock(args->yMutex);
      if (unlocked != 0) status = -4;
    }

END:
  // Cleanup
  freePages(y_work);
  freePages(a_slice);
  freePages(b_slice);
  return status;
}