/*
 * Function: updo_trsm_starpu_common
 *
 * Parameters:
 *   buffers    - Data handlers :
 *     1            - L column block
 *     2            - Right-hand-side block facing the column block.
 *   _args      - Codelet arguments:
 *     sopalin_data - global PaStiX internal data.
 *     cblknum      - Current column block index.
 */
static inline
void updo_trsm_starpu_common(void * buffers[], void * _args, int arch)
{
  starpu_updo_trsm_data_t * args         = (starpu_updo_trsm_data_t*)_args;
  Sopalin_Data_t          * sopalin_data = args->sopalin_data;
  SolverMatrix            * datacode     = sopalin_data->datacode;
  PASTIX_FLOAT                   * L            = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]);
  PASTIX_FLOAT                   * RHS          = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]);
  PASTIX_INT                       stride       = STARPU_MATRIX_GET_LD(buffers[0]);
  PASTIX_INT                       rhsnbr       = STARPU_MATRIX_GET_NY(buffers[1]);
  PASTIX_INT                       rhssze       = STARPU_MATRIX_GET_LD(buffers[1]);
  PASTIX_INT                       cblknum      = args->cblknum;
  char                    * transpose    = &(args->transpose);
  char                    * diag         = &(args->diag);
  PASTIX_INT                       colnbr       = CBLK_COLNBR(cblknum);
  PASTIX_FLOAT                     fun          = 1.0;

  ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN);
  ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN);
  switch(arch) {
  case ARCH_CPU:
    SOPALIN_TRSM("L","L",transpose,diag,colnbr,rhsnbr,fun,L,stride,RHS,rhssze);
    break;
  case ARCH_CUDA:
  default:
    errorPrint("Unknown Architecture");
    assert(0);
    break;
  }
}
Пример #2
0
static inline
void API_CALL(wait_contrib_comp_2d)(Sopalin_Data_t *sopalin_data,
                                    PASTIX_INT me, PASTIX_INT i){

  SolverMatrix  *datacode    = sopalin_data->datacode;
#ifdef TRACE_SOPALIN
  Thread_Data_t *thread_data = sopalin_data->thread_data[me];
#endif
#ifdef SMP_SOPALIN
  PASTIX_INT            firsttask   = TASK_MASTER(i);
#endif

  /* Attente contribution locale et MPI */
#if (defined FORCE_CONSO)
  if (THREAD_FUNNELED_OFF)
    {
      while ((!(TASK_BTAGPTR(i)))
             || (!(RTASK_COEFTAB(i))))
        {
          API_CALL(rcsd_testall_fab)(sopalin_data, me);
        }
    }
  else
#endif
    {
      if (THREAD_COMM_OFF)
        {
          while ((TASK_BTAGPTR(i) == NULL) && (sopalin_data->taskmark[i] > 0))
            {
              ASSERTDBG(i == firsttask, MOD_SOPALIN);
              RECV_ONE_BLOCK;
            }
        }
    }

  trace_begin_task(thread_data->tracefile,
                   SOPALIN_CLOCK_TRACE, SOLV_PROCNUM, me, 1,
                   STATE_WAITLOC, i);

#if (DBG_PASTIX_DYNSCHED > 0)
  MUTEX_LOCK(&(sopalin_data->mutex_task[i]));
  ASSERTDBG(((TASK_TASKID(i) == E2) && (sopalin_data->taskmark[i] == 0))
            || ((TASK_TASKID(i) == E1) &&
                (sopalin_data->taskmark[i] == 1)), MOD_SOPALIN);
  ASSERTDBG(TASK_BTAGPTR(i)  != NULL, MOD_SOPALIN);
  ASSERTDBG(RTASK_COEFTAB(i) != NULL, MOD_SOPALIN);
  sopalin_data->taskmark[i]++;
  MUTEX_UNLOCK(&(sopalin_data->mutex_task[i]));
#endif

  MUTEX_LOCK(&(sopalin_data->mutex_task[firsttask]));
  while ((!(TASK_BTAGPTR(i)))
         || (!(RTASK_COEFTAB(i))))
  COND_WAIT(&(sopalin_data->cond_task[firsttask]),
            &(sopalin_data->mutex_task[firsttask]));
  MUTEX_UNLOCK(&(sopalin_data->mutex_task[firsttask]));
}
/*
 * Function: updo_diag_starpu_common
 *
 * Divide the right-hand-side(s) by the diagonal
 *
 * Parameters:
 *   buffers    - Data handlers :
 *     0            - L column block
 *     1            - Right-hand-side block facing the column block.
 *   _args      - Codelet arguments:
 *     sopalin_data - global PaStiX internal data.
 *     cblknum      - Current column block index.
 *   arch       - Type of architecture : ARCH_CPU | ARCH_CUDA
 */
static inline
void updo_diag_starpu_common(void * buffers[], void * _args, int arch)
{
  starpu_updo_diag_data_t * args         = (starpu_updo_diag_data_t*)_args;
  Sopalin_Data_t          * sopalin_data = args->sopalin_data;
  SolverMatrix            * datacode     = sopalin_data->datacode;
  PASTIX_FLOAT                   * L            = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]);
  PASTIX_FLOAT                   * RHS          = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]);
  PASTIX_INT                       stride       = STARPU_MATRIX_GET_LD(buffers[0]);
  PASTIX_INT                       rhsnbr       = STARPU_MATRIX_GET_NY(buffers[1]);
  PASTIX_INT                       rhssze       = STARPU_MATRIX_GET_LD(buffers[1]);
  PASTIX_INT                       cblknum      = args->cblknum;
  PASTIX_INT                       colnbr       = CBLK_COLNBR(cblknum);

  ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN);
  ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN);

  switch(arch) {
  case ARCH_CPU:
  {
    PASTIX_INT i, j;
    PASTIX_FLOAT * myRHS = RHS;
    for (j = 0; j < rhsnbr; j++)
      {
        for (i = 0; i < colnbr; i++)
          {
            myRHS[i] /= L[i*(stride+1)];
          }
        myRHS += rhssze;
      }
    break;
  }
  case ARCH_CUDA:
  default:
    errorPrint("Unknown Architecture");
    assert(0);
    break;
  }
}
/*
 * Function: updo_up_gemm_starpu_common
 *
 * Parameters:
 *   buffers    - Data handlers :
 *     1            - L column block
 *     2            - Right-hand-side block facing the column block.
 *   _args      - Codelet arguments:
 *     sopalin_data - global PaStiX internal data.
 *     cblknum      - Current column block index.
 */
static inline
void updo_up_gemm_starpu_common(void * buffers[], void * _args, int arch)
{
  starpu_updo_gemm_data_t * args         = (starpu_updo_gemm_data_t*)_args;
  Sopalin_Data_t          * sopalin_data = args->sopalin_data;
  SolverMatrix            * datacode     = sopalin_data->datacode;
  PASTIX_FLOAT                   * L            = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]);
  PASTIX_FLOAT                   * RHS          = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]);
  PASTIX_FLOAT                   * RHS2         = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[2]);
  PASTIX_INT                       stride       = STARPU_MATRIX_GET_LD(buffers[0]);
  PASTIX_INT                       rhsnbr       = STARPU_MATRIX_GET_NY(buffers[1]);
  PASTIX_INT                       rhssze       = STARPU_MATRIX_GET_LD(buffers[1]);
  PASTIX_INT                       cblknum      = args->cblknum;
  PASTIX_INT                       bloknum      = args->bloknum;
  char                    * transpose    = &(args->transpose);
  PASTIX_INT                       fcblknum     = SYMB_CBLKNUM(bloknum);
  PASTIX_INT                       colnbr       = CBLK_COLNBR(cblknum);
  PASTIX_INT                       rownbr       = BLOK_ROWNBR(bloknum);
  PASTIX_FLOAT                     fun          = 1.0;
  PASTIX_FLOAT                   * ga           = L + SOLV_COEFIND(bloknum);
  PASTIX_FLOAT                   * gc           = RHS2 +
    SYMB_FROWNUM(bloknum) - SYMB_FCOLNUM(fcblknum);

  ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN);
  ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN);

  switch(arch) {
  case ARCH_CPU:
    SOPALIN_GEMM(transpose,"N",colnbr,rhsnbr,rownbr,-fun,ga,stride,
                 gc,rhssze,fun,RHS, UPDOWN_SM2XSZE);
    break;
  case ARCH_CUDA:
  default:
    errorPrint("Unknown Architecture");
    assert(0);
    break;
  }
}
Пример #5
0
static inline void API_CALL(wait_contrib_comp_1d)(Sopalin_Data_t *sopalin_data, PASTIX_INT me, PASTIX_INT i){

  SolverMatrix  *datacode    = sopalin_data->datacode;
#ifdef TRACE_SOPALIN
  Thread_Data_t *thread_data = sopalin_data->thread_data[me];
#endif

#if (defined FORCE_CONSO)
  if (THREAD_FUNNELED_OFF)
    {
      /* Attente en Multiple / force_conso */
      while(TASK_CTRBCNT(i))
        {
          API_CALL(rcsd_testall_fab)(sopalin_data, me);
        }
    }
  else
#endif
    {
      if (THREAD_COMM_OFF)
        {
          /* Attente en multiple sans force conso */
          while(TASK_FTGTCNT(i))
            {
              RECV_ONE_FANIN;
            }
        }
    }
  trace_begin_task(thread_data->tracefile,
                   SOPALIN_CLOCK_TRACE, SOLV_PROCNUM, me, 1,
                   STATE_WAITLOC, i);

  MUTEX_LOCK(&(sopalin_data->mutex_task[i]));
#if (DBG_PASTIX_DYNSCHED > 0)
  ASSERTDBG(sopalin_data->taskmark[i] == 0, MOD_SOPALIN);
  sopalin_data->taskmark[i]++;
#endif
  while (TASK_CTRBCNT(i))
  {
    COND_WAIT(&(sopalin_data->cond_task[i]), &(sopalin_data->mutex_task[i]));
  }
  MUTEX_UNLOCK(&(sopalin_data->mutex_task[i]));

}
Пример #6
0
BOOLEAN
PEFile::RtlGetExports(
)
/*++

Routine Description:

    Description.

Arguments:

    -

Return Value:

    BOOLEAN.

--*/
{
    BOOLEAN Result = FALSE;
    PIMAGE_EXPORT_DIRECTORY ExportDir = NULL;

    ULONG DirRva, DirSize;

    PULONG AddressOfNames;
    PUSHORT AddressOfNameOrdinals;
    PULONG AddressOfFunctions;

    UINT i;

    ASSERTDBG(m_Image.Initialized);
    if (!m_Image.Initialized) goto CleanUp;

    DirRva = m_Image.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
    DirSize = m_Image.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].Size;

    if (!DirRva || (m_ImageSize && (DirRva > m_ImageSize))) goto CleanUp;

    PUCHAR Image = ((PUCHAR)m_Image.Image);

    if (!DirSize || !DirRva) goto CleanUp;

    ExportDir = (PIMAGE_EXPORT_DIRECTORY)(Image + DirRva);

    if ((ExportDir->AddressOfNames >= (DirRva + DirSize)) ||
        (ExportDir->AddressOfNameOrdinals >= (DirRva + DirSize)) ||
        (ExportDir->AddressOfFunctions >= (DirRva + DirSize)))
    {
        goto CleanUp;
    }

    AddressOfNames = (PULONG)(Image + (ULONG)ExportDir->AddressOfNames);
    AddressOfNameOrdinals = (PUSHORT)(Image + (ULONG)ExportDir->AddressOfNameOrdinals);
    AddressOfFunctions = (PULONG)(Image + (ULONG)ExportDir->AddressOfFunctions);

#if VERBOSE_MODE
    g_Ext->Dml("(%s) ExportDir->NumberOfName: %d, ExportDir->NumberOfFunctions: %d\n",
               m_PdbInfo.PdbName, ExportDir->NumberOfNames, ExportDir->NumberOfFunctions);
#endif

    m_NumberOfExportedFunctions = ExportDir->NumberOfNames;
    ULONG NumberOfHookedAPIs = 0;
    for (i = 0; i < ExportDir->NumberOfNames && i < 5000; i += 1)
    {
        EXPORT_INFO ExportInfo = { 0 };

        if (AddressOfNameOrdinals[i] >= ExportDir->NumberOfNames) continue;

        ExportInfo.Address = AddressOfFunctions[AddressOfNameOrdinals[i]];

        ExportInfo.Index = i;
        ExportInfo.Ordinal = AddressOfNameOrdinals[i];
        ExportInfo.IsTablePatched = (ExportInfo.Address >= m_ImageSize) ? TRUE : FALSE;
        ExportInfo.IsHooked = IsPointerHooked(m_ImageBase + ExportInfo.Address);
        if (ExportInfo.IsTablePatched || ExportInfo.IsHooked) NumberOfHookedAPIs++;

        ULONG Len = (ULONG)strnlen_s((LPSTR)(Image + AddressOfNames[i]), sizeof(ExportInfo.Name) - 1);
        if ((AddressOfNames[i] <= (DirRva + DirSize)) && Len)
        {
            // strcpy_s(ExportInfo.Name, sizeof(ExportInfo.Name), (LPSTR)(Image + AddressOfNames[i]));
            memcpy_s(ExportInfo.Name, sizeof(ExportInfo.Name), (LPSTR)(Image + AddressOfNames[i]), Len);
        }
        else
        {
            strcpy_s(ExportInfo.Name, sizeof(ExportInfo.Name), "*unreadable*");
        }

        m_Exports.push_back(ExportInfo);
    }

    m_NumberOfHookedAPIs = NumberOfHookedAPIs;

    Result = TRUE;

CleanUp:
    return Result;
}
Пример #7
0
/*
 * Factorization of diagonal block
 */
void factor_diag (Sopalin_Data_t *sopalin_data, PASTIX_INT me, PASTIX_INT c)
{

  PASTIX_INT    size,stride;
  PASTIX_FLOAT *ga = NULL;
#ifdef SOPALIN_LU
  PASTIX_FLOAT *gb = NULL;
#endif
  SolverMatrix  *datacode    = sopalin_data->datacode;
  Thread_Data_t *thread_data = sopalin_data->thread_data[me];

  /* check if diagonal column block */
  ASSERTDBG ( SYMB_FCOLNUM(c) == SYMB_FROWNUM(SYMB_BLOKNUM(c)),
              MOD_SOPALIN );

  /* Initialisation des pointeurs de blocs */
  ga = & (SOLV_COEFTAB(c)[ SOLV_COEFIND(SYMB_BLOKNUM(c))]);
#ifdef SOPALIN_LU
  gb = & (SOLV_UCOEFTAB(c)[SOLV_COEFIND(SYMB_BLOKNUM(c))]);
#endif
  size   = SYMB_LCOLNUM (c)-SYMB_FCOLNUM(c)+1;
  stride = SOLV_STRIDE (c);

#ifdef COMPUTE
#  ifdef CHOL_SOPALIN
#    ifdef SOPALIN_LU
  PASTIX_getrf_block (ga, size, size, stride,
                      & (thread_data->nbpivot),
                      sopalin_data->critere);
  DimTrans (ga,stride,size,gb);
#    else /* SOPALIN_LU */
  /*SOPALIN_POF ("L",ga,stride,size);*/
  PASTIX_potrf_block (ga, size, stride,
                      & (thread_data->nbpivot),
                      sopalin_data->critere);

#    endif /* SOPALIN_LU */

#  else /* CHOL_SOPALIN */

  /* version avec PPF full storage
     gb=maxbloktab1[me];
     TALIGNF (ga,size,stride,gb);
     SOPALIN_PPF (gb,size,izero);
     TALIGNB (ga,size,stride,gb); */
#    ifdef HERMITIAN
  PASTIX_hetrf_block (ga, size, stride,
                      & (thread_data->nbpivot),
                      sopalin_data->critere,
                      thread_data->maxbloktab1);
#    else
  PASTIX_sytrf_block (ga, size, stride,
                      & (thread_data->nbpivot),
                      sopalin_data->critere,
                      thread_data->maxbloktab1);
#    endif
  /* version BLAS 3 */
  /* Copy diagonal for esp tasks */
  if (sopalin_data->sopar->iparm[IPARM_ESP])
    {
      stride++;
      SOPALIN_COPY (size, ga, stride,
                    & (SOLV_UCOEFTAB(c)[SOLV_COEFIND(SYMB_BLOKNUM(c))]), iun);
    }

#  endif /* CHOL_SOPALIN */
#endif /* COMPUTE */
}