void Slave::renderFrame(Renderer *tiledRenderer, 
                              FrameBuffer *fb,
                              const uint32 channelFlags
                              )
      {
        Ref<RenderTask> renderTask
          = new RenderTask;//(fb,tiledRenderer->createRenderJob(fb));
        renderTask->fb = fb;
        renderTask->renderer = tiledRenderer;
        renderTask->numTiles_x = divRoundUp(fb->size.x,TILE_SIZE);
        renderTask->numTiles_y = divRoundUp(fb->size.y,TILE_SIZE);
        renderTask->channelFlags = channelFlags;
        tiledRenderer->beginFrame(fb);

        /*! iw: using a local sync event for now; "in theory" we should be
          able to attach something like a sync event to the frame
          buffer, just trigger the task here, and let somebody else sync
          on the framebuffer once it is needed; alas, I'm currently
          running into some issues with the embree taks system when
          trying to do so, and thus am reverting to this
          fully-synchronous version for now */
        
        // renderTask->fb->frameIsReadyEvent = TaskScheduler::EventSync();
        TaskScheduler::EventSync sync;
        renderTask->task = embree::TaskScheduler::Task
          (&sync,
           // (&renderTask->fb->frameIsReadyEvent,
           renderTask->_run,renderTask.ptr,
           renderTask->numTiles_x*renderTask->numTiles_y,
           renderTask->_finish,renderTask.ptr,
           "LocalTiledLoadBalancer::RenderTask");
        TaskScheduler::addTask(-1, TaskScheduler::GLOBAL_BACK, &renderTask->task); 
        sync.sync();
      }
Example #2
0
static void
calcNrThreads(
    size_t threads[2],
    const SubproblemDim *subdims,
    const PGranularity *pgran,
    const void *args,
    const void *extra)
{
    size_t yLen;     /* Length of "Y" vector */
    const CLBlasKargs *kargs = args;
    unsigned int subgr = pgran->wgSize[0] / (subdims[0].bwidth / subdims[1].bwidth);

    (void)subdims;
    (void)extra;

    yLen = kargs->transA == clblasNoTrans ? kargs->M : kargs->N;

    if (yLen == 0) {
        yLen = 1;
        //launch one group to avoid CL_INVALID_WORK_GROUP_SIZE error
    }

    //each work item handles y1 lines
    threads[0] = divRoundUp(yLen, subdims[1].y) * subgr;
    threads[0] = roundUp(threads[0], pgran->wgSize[0]);
    threads[1] = 0;
}
Example #3
0
unsigned int
tileVectorsNum(const Tile *tile)
{
    size_t pitch, height;

    pitch = tilePitch(tile);
    height = (tile->trans) ? tile->nrCols : tile->nrRows;

    return (unsigned int)divRoundUp(height * pitch, tile->vecLen);
}
      void Master::renderFrame(Renderer *tiledRenderer,
                               FrameBuffer *fb,
                               const uint32 channelFlags)
      {
        int rc; 
        MPI_Status status;

        // mpidevice already sent the 'cmd_render_frame' event; we
        // only have to wait for tiles...
        const size_t numTiles
          = divRoundUp(fb->size.x,TILE_SIZE)
          * divRoundUp(fb->size.y,TILE_SIZE);
        
        assert(fb->colorBufferFormat == OSP_RGBA_I8);
        uint32 rgba_i8[TILE_SIZE][TILE_SIZE];
        for (int i=0;i<numTiles;i++) {
          box2ui region;
          // printf("#m: receiving tile %i\n",i);
          rc = MPI_Recv(&region,4,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,
                        mpi::worker.comm,&status); 
          Assert(rc == MPI_SUCCESS); 
          // printf("#m: received tile %i (%i,%i) from %i\n",i,
          //        tile.region.lower.x,tile.region.lower.y,status.MPI_SOURCE);
          rc = MPI_Recv(&rgba_i8[0],TILE_SIZE*TILE_SIZE,MPI_INT,
                        status.MPI_SOURCE,status.MPI_TAG,mpi::worker.comm,&status);
          Assert(rc == MPI_SUCCESS);

          ospray::LocalFrameBuffer *lfb = (ospray::LocalFrameBuffer *)fb;
          for (int iy=region.lower.y;iy<region.upper.y;iy++)
            for (int ix=region.lower.x;ix<region.upper.x;ix++) {
              ((uint32*)lfb->colorBuffer)[ix+iy*lfb->size.x] 
                = rgba_i8[iy-region.lower.y][ix-region.lower.x];
            }
        }
        //        printf("#m: master done fb %lx\n",fb);
      }
Example #5
0
bool
FileHeader::Allocate(PersistentBitmap *freeMap, int fileSize)
{ 
    numBytes = fileSize;
	numSectors  = divRoundUp(fileSize, kernel->diskSectorSize);
    if (freeMap->NumClear() < numSectors)
		return FALSE;		// not enough space

    for (int i = 0; i < numSectors; i++) {
		dataSectors[i] = freeMap->FindAndSet();
		// since we checked that there was enough free space,
		// we expect this to succeed
		ASSERT(dataSectors[i] >= 0);
    }
    return TRUE;
}
Example #6
0
static void
calcNrThreads(
    size_t threads[2],
    const SubproblemDim *subdims,
    const PGranularity *pgran,
    const void *args,
    const void *extra)
{
    const CLBlasKargs *kargs = args;
    unsigned int subgr = subdims[0].bwidth / subdims[1].bwidth;
    (void)extra;

    //each work item handles y1 lines
    threads[0] = divRoundUp(kargs->N, subdims[1].y) * subgr;
    threads[0] = roundUp(threads[0], pgran->wgSize[0]);
    threads[1] = 0;
}
Example #7
0
void LocalFrameBuffer::clear(const uint32 fbChannelFlags)
{
    if (fbChannelFlags & OSP_FB_ACCUM) {
        // it is only necessary to reset the accumID,
        // LocalFrameBuffer_accumulateTile takes care of clearing the
        // accumulation buffers
        memset(tileAccumID, 0, tiles*sizeof(int32));

        // always also clear error buffer (if present)
        if (hasVarianceBuffer) {
            for (int i = 0; i < tiles; i++)
                tileErrorBuffer[i] = inf;

            errorRegion.clear();
            // initially create one region covering the complete image
            errorRegion.push_back(box2i(vec2i(0), vec2i(tilesx, divRoundUp(size.y, TILE_SIZE))));
        }
    }
}
Example #8
0
 template <typename T> T roundUpToMultiple(T value, T multiplier)
 {
     return divRoundUp(value, multiplier) * multiplier;
 }
uint32_t crearSegmento(uint32_t pid, size_t size, t_msg_id* id) {

	uint16_t numSegmento;
	uint16_t cantPaginas = divRoundUp(size,PAG_SIZE);

	if (size > SEG_MAX_SIZE) {
		*id = INVALID_SEG_SIZE;
		return 0;
	}

	if (CantPaginasDisponibles >= cantPaginas) {

		int pag;
		t_segmento *tablaLocal;
		char *stringPID = string_uitoa(pid);

		if ((tablaLocal = tablaDelProceso(stringPID)) == NULL)	{
			dictionary_put(TablaSegmentosGlobal, stringPID, tablaLocal = malloc(NUM_SEG_MAX * sizeof(t_segmento)));			/* Creo la tabla local del proceso PID */

			for (numSegmento = 0; numSegmento < NUM_SEG_MAX; ++numSegmento)													/* Inicializo la Tabla Local de Segmentos */
				tablaLocal[numSegmento].limite = 0;
		}

		if ((numSegmento = primerEntradaLibre(tablaLocal)) == NUM_SEG_MAX) {
			*id = MAX_SEG_NUM_REACHED;
			return 0;
		}

		CantPaginasDisponibles -= cantPaginas;
		tablaLocal[numSegmento].limite = size;
		tablaLocal[numSegmento].bytesOcupados = 0;
		tablaLocal[numSegmento].tablaPaginas = malloc(cantPaginas * sizeof(t_pagina));

		/* Marco las páginas como NO en Memoria Principal */
		for (pag = 0; pag < cantPaginas && CantPaginasEnSwapDisponibles; ++pag, --CantPaginasEnSwapDisponibles)
			paginaEnMemoria(tablaLocal, numSegmento, pag) = false;

		if (CantPaginasEnSwapDisponibles == 0) {
			pthread_mutex_lock(&LogMutex);
			log_trace(Logger, "Espacio de intercambio llleno.");
			pthread_mutex_unlock(&LogMutex);
		}

		/* Si la SWAP se llena reservo marcos de Memoria Principal para el resto de las páginas */
		for (; pag < cantPaginas; ++pag, --CantPaginasEnMemoriaDisponibles) {

			MemoriaPrincipal[marcoDePagina(tablaLocal, numSegmento, pag) = marcoVacio()].ocupado = true;
			MemoriaPrincipal[marcoDePagina(tablaLocal, numSegmento, pag)].pid = pid;
			paginaEnMemoria(tablaLocal, numSegmento, pag) = true;
			AgregarPaginaAEstructuraSustitucion(pid, numSegmento, pag);

			pthread_mutex_lock(&LogMutex);
			log_trace(Logger, "Marco %u asignado al proceso %s.", marcoDePagina(tablaLocal, numSegmento, pag), stringPID);
			pthread_mutex_unlock(&LogMutex);
		}

		free(stringPID);

		if (CantPaginasEnMemoriaDisponibles == 0) {
			pthread_mutex_lock(&LogMutex);
			log_trace(Logger, "Memoria Principal lllena.");
			pthread_mutex_unlock(&LogMutex);
		}

	}
	else {
		*id = FULL_MEMORY;
		return 0;
	}

	*id = OK_CREATE;
	return generarDireccionLogica(numSegmento,0,0);
}
Example #10
0
LocalFrameBuffer::LocalFrameBuffer(const vec2i &size,
                                   ColorBufferFormat colorBufferFormat,
                                   bool hasDepthBuffer,
                                   bool hasAccumBuffer,
                                   bool hasVarianceBuffer,
                                   void *colorBufferToUse)
    : FrameBuffer(size, colorBufferFormat, hasDepthBuffer, hasAccumBuffer, hasVarianceBuffer)
{
    Assert(size.x > 0);
    Assert(size.y > 0);
    if (colorBufferToUse)
        colorBuffer = colorBufferToUse;
    else {
        switch (colorBufferFormat) {
        case OSP_FB_NONE:
            colorBuffer = NULL;
            break;
        case OSP_FB_RGBA8:
        case OSP_FB_SRGBA:
            colorBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
            break;
        case OSP_FB_RGBA32F:
            colorBuffer = (uint32*)alignedMalloc(sizeof(uint32)*size.x*size.y);
            break;
        default:
            throw std::runtime_error("color buffer format not supported");
        }
    }

    if (hasDepthBuffer)
        depthBuffer = (float*)alignedMalloc(sizeof(float)*size.x*size.y);
    else
        depthBuffer = NULL;

    if (hasAccumBuffer)
        accumBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
    else
        accumBuffer = NULL;

    tilesx = divRoundUp(size.x, TILE_SIZE);
    tiles = tilesx * divRoundUp(size.y, TILE_SIZE);
    tileAccumID = new int32[tiles];
    memset(tileAccumID, 0, tiles*sizeof(int32));

    if (hasVarianceBuffer) {
        varianceBuffer = (vec4f*)alignedMalloc(sizeof(vec4f)*size.x*size.y);
        tileErrorBuffer = new float[tiles];
        // maximum number of regions: all regions are of size 3 are split in half
        errorRegion.reserve(divRoundUp(tiles*2, 3));
    } else {
        varianceBuffer = NULL;
        tileErrorBuffer = NULL;
    }

    ispcEquivalent = ispc::LocalFrameBuffer_create(this,size.x,size.y,
                     colorBufferFormat,
                     colorBuffer,
                     depthBuffer,
                     accumBuffer,
                     varianceBuffer,
                     tileAccumID,
                     tileErrorBuffer);
}
Example #11
0
static int
genLoopsK(
    struct KgenContext *ctx,
    BlasGenSettings *gset,
    TileMulOpts *mulOpts,
    char *tmp)
{
    KernelExtraFlags kflags = gset->kextra->flags;
    const size_t y0 = gset->subdims[0].y;
    const size_t bwidth = gset->subdims[1].bwidth;
    int ret;
    bool isRel = false;
    const char *inTypeNameA, *inPtrNameA, *inTypeNameB, *inPtrNameB;

    getVectorTypeName(gset->kextra->dtype, gset->kextra->vecLenA, &inTypeNameA, &inPtrNameA);
    getVectorTypeName(gset->kextra->dtype, gset->kextra->vecLenB, &inTypeNameB, &inPtrNameB);

    sprintf(tmp, "uint k0;\n");
    kgenAddStmt(ctx, tmp);

    if (!(kflags & (KEXTRA_TAILS_M_LOWER | KEXTRA_TAILS_N_LOWER |
                    KEXTRA_TAILS_K_LOWER))) {

        FetchAddrMode addrMode = FETCH_ADDR_A_RELATIVE | FETCH_ADDR_B_RELATIVE |
                                 FETCH_ADDR_K_RELATIVE;

        isRel = true;

        mulOpts->fctx = createFetchContext();
        if (mulOpts->fctx == NULL) {
            return -ENOMEM;
        }
        setFetchAddrMode(mulOpts->fctx, addrMode);

        gset->varNames.A = "pA";
        gset->varNames.B = "pB";
    }
    else {
        gset->flags |= BGF_UPTRS;
        kgenPrintf(ctx, "GPtr Ag, Bg;\n"
                        "\n"
                        "Ag.%s = A;\n"
                        "Bg.%s = B;\n\n",
                   inPtrNameA, inPtrNameB);
    }

    if (isMatrixUpper(kflags)) {
        if (isRel) {
            switch ((((gset->kextra->flags & KEXTRA_TRANS_A) != 0)<<1) |
                    (((gset->kextra->flags & KEXTRA_UPPER_TRIANG) != 0) ^
                     ((gset->kextra->flags & KEXTRA_COLUMN_MAJOR) != 0))
                   ) {
            case 0:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.z, lda, coord.y)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.x, ldb, coord.z)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            case 1:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.y, lda, coord.z)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.z, ldb, coord.x)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            case 2:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.z, lda, coord.y)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.z, ldb, coord.x)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            case 3:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.y, lda, coord.z)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.x, ldb, coord.z)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            }
        }

        sprintf(tmp,
            "for (k0 = kBegin; "
                "(k0 <= (kBegin + %luu))&&(k0 < M); "
                "k0 += %lu)",
            y0,
            bwidth);
        kgenBeginBranch(ctx, tmp);

        kgenPrintf( ctx,
            "coord.z = k0;\n");

        mulOpts->postFetch = genTrxmPostFetchZero;
        ret = tileMulGen(ctx, gset, mulOpts);
        if (ret != 0) {
            return ret;
        }
        kgenEndBranch(ctx, NULL);

        //main triangle part
        sprintf(tmp,
            "for (; k0 <= max(0, (int)M - %lu); k0 += %lu)",
            y0,
            gset->subdims[1].bwidth);

        kgenBeginBranch(ctx, tmp);

        mulOpts->postFetch = NULL;
        ret = tileMulGen(ctx, gset, mulOpts);
        if (ret != 0) {
            return ret;
        }
        kgenEndBranch(ctx, NULL);

        // matrix side part
        // should be calculated by item0 of each subgroup
        sprintf(tmp, "for (; k0 < M; k0 += %lu)", bwidth);
        kgenBeginBranch(ctx, tmp);

        kgenPrintf( ctx,
            "coord.z = k0;\n");

        resetFetchNumA(mulOpts);
        mulOpts->postFetch = genTrxmPostFetchZero;
        ret = tileMulGen(ctx, gset, mulOpts);
        if (ret != 0) {
            return ret;
        }
        kgenEndBranch(ctx, NULL);

    }
    else {
        // lower
        size_t diagBlocks; //Number of bw *y blocks that fit in y*y square

        if (isRel) {
            switch ((((gset->kextra->flags & KEXTRA_TRANS_A) != 0)<<1) |
                    (((gset->kextra->flags & KEXTRA_UPPER_TRIANG) != 0) ^
                     ((gset->kextra->flags & KEXTRA_COLUMN_MAJOR) != 0))
                   ) {
            case 0:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.y, lda, coord.z)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.z, ldb, coord.x)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            case 1:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.z, lda, coord.y)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.x, ldb, coord.z)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            case 2:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.y, lda, coord.z)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.x, ldb, coord.z)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            case 3:
                kgenPrintf(ctx,
                    "__global %s *pA = (__global %s *)&A[mad24(coord.z, lda, coord.y)];\n"
                    "__global %s *pB = (__global %s *)&B[mad24(coord.z, ldb, coord.x)];\n",
                    inTypeNameA, inTypeNameA,inTypeNameB, inTypeNameB);
                break;
            }
        }

        diagBlocks = divRoundUp(y0, bwidth);
        sprintf(tmp, "uint iterK = min(currM + %luu, M);\n", y0);
        kgenAddStmt(ctx, tmp);
        sprintf(tmp, "iterK = (iterK + %lu) / %lu;\n", bwidth - 1, bwidth);
        kgenAddStmt(ctx, tmp);

        // main triangle part
        sprintf(tmp, "for (k0 = 0; k0 < max(0, (int)iterK - %lu); k0++)",
                diagBlocks);
        kgenBeginBranch(ctx, tmp);
        mulOpts->postFetch = NULL;
        // part without diagonal elements post fetch zeroing
        ret = tileMulGen(ctx, gset, mulOpts);
        if (ret != 0) {
            return ret;
        }
        kgenEndBranch(ctx, NULL);

        // diagonal part
        sprintf(tmp, "for (; k0 < iterK; k0++)");
        kgenBeginBranch(ctx, tmp);

        kgenPrintf( ctx,
            "coord.z = k0 * %lu;\n",
            bwidth);

        // diagonal blocks part
        mulOpts->postFetch = genTrxmPostFetchZero;
        resetFetchNumA(mulOpts);
        ret = tileMulGen(ctx, gset, mulOpts);
        if (ret != 0) {
            return ret;
        }
        kgenEndBranch(ctx, NULL);
    }

    if (isRel) {
        destroyFetchContext(mulOpts->fctx);
        mulOpts->fctx = NULL;
    }

    return 0;
}