/* * Function: updo_trsm_starpu_common * * Parameters: * buffers - Data handlers : * 1 - L column block * 2 - Right-hand-side block facing the column block. * _args - Codelet arguments: * sopalin_data - global PaStiX internal data. * cblknum - Current column block index. */ static inline void updo_trsm_starpu_common(void * buffers[], void * _args, int arch) { starpu_updo_trsm_data_t * args = (starpu_updo_trsm_data_t*)_args; Sopalin_Data_t * sopalin_data = args->sopalin_data; SolverMatrix * datacode = sopalin_data->datacode; PASTIX_FLOAT * L = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]); PASTIX_FLOAT * RHS = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]); PASTIX_INT stride = STARPU_MATRIX_GET_LD(buffers[0]); PASTIX_INT rhsnbr = STARPU_MATRIX_GET_NY(buffers[1]); PASTIX_INT rhssze = STARPU_MATRIX_GET_LD(buffers[1]); PASTIX_INT cblknum = args->cblknum; char * transpose = &(args->transpose); char * diag = &(args->diag); PASTIX_INT colnbr = CBLK_COLNBR(cblknum); PASTIX_FLOAT fun = 1.0; ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN); ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN); switch(arch) { case ARCH_CPU: SOPALIN_TRSM("L","L",transpose,diag,colnbr,rhsnbr,fun,L,stride,RHS,rhssze); break; case ARCH_CUDA: default: errorPrint("Unknown Architecture"); assert(0); break; } }
static inline void API_CALL(wait_contrib_comp_2d)(Sopalin_Data_t *sopalin_data, PASTIX_INT me, PASTIX_INT i){ SolverMatrix *datacode = sopalin_data->datacode; #ifdef TRACE_SOPALIN Thread_Data_t *thread_data = sopalin_data->thread_data[me]; #endif #ifdef SMP_SOPALIN PASTIX_INT firsttask = TASK_MASTER(i); #endif /* Attente contribution locale et MPI */ #if (defined FORCE_CONSO) if (THREAD_FUNNELED_OFF) { while ((!(TASK_BTAGPTR(i))) || (!(RTASK_COEFTAB(i)))) { API_CALL(rcsd_testall_fab)(sopalin_data, me); } } else #endif { if (THREAD_COMM_OFF) { while ((TASK_BTAGPTR(i) == NULL) && (sopalin_data->taskmark[i] > 0)) { ASSERTDBG(i == firsttask, MOD_SOPALIN); RECV_ONE_BLOCK; } } } trace_begin_task(thread_data->tracefile, SOPALIN_CLOCK_TRACE, SOLV_PROCNUM, me, 1, STATE_WAITLOC, i); #if (DBG_PASTIX_DYNSCHED > 0) MUTEX_LOCK(&(sopalin_data->mutex_task[i])); ASSERTDBG(((TASK_TASKID(i) == E2) && (sopalin_data->taskmark[i] == 0)) || ((TASK_TASKID(i) == E1) && (sopalin_data->taskmark[i] == 1)), MOD_SOPALIN); ASSERTDBG(TASK_BTAGPTR(i) != NULL, MOD_SOPALIN); ASSERTDBG(RTASK_COEFTAB(i) != NULL, MOD_SOPALIN); sopalin_data->taskmark[i]++; MUTEX_UNLOCK(&(sopalin_data->mutex_task[i])); #endif MUTEX_LOCK(&(sopalin_data->mutex_task[firsttask])); while ((!(TASK_BTAGPTR(i))) || (!(RTASK_COEFTAB(i)))) COND_WAIT(&(sopalin_data->cond_task[firsttask]), &(sopalin_data->mutex_task[firsttask])); MUTEX_UNLOCK(&(sopalin_data->mutex_task[firsttask])); }
/* * Function: updo_diag_starpu_common * * Divide the right-hand-side(s) by the diagonal * * Parameters: * buffers - Data handlers : * 0 - L column block * 1 - Right-hand-side block facing the column block. * _args - Codelet arguments: * sopalin_data - global PaStiX internal data. * cblknum - Current column block index. * arch - Type of architecture : ARCH_CPU | ARCH_CUDA */ static inline void updo_diag_starpu_common(void * buffers[], void * _args, int arch) { starpu_updo_diag_data_t * args = (starpu_updo_diag_data_t*)_args; Sopalin_Data_t * sopalin_data = args->sopalin_data; SolverMatrix * datacode = sopalin_data->datacode; PASTIX_FLOAT * L = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]); PASTIX_FLOAT * RHS = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]); PASTIX_INT stride = STARPU_MATRIX_GET_LD(buffers[0]); PASTIX_INT rhsnbr = STARPU_MATRIX_GET_NY(buffers[1]); PASTIX_INT rhssze = STARPU_MATRIX_GET_LD(buffers[1]); PASTIX_INT cblknum = args->cblknum; PASTIX_INT colnbr = CBLK_COLNBR(cblknum); ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN); ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN); switch(arch) { case ARCH_CPU: { PASTIX_INT i, j; PASTIX_FLOAT * myRHS = RHS; for (j = 0; j < rhsnbr; j++) { for (i = 0; i < colnbr; i++) { myRHS[i] /= L[i*(stride+1)]; } myRHS += rhssze; } break; } case ARCH_CUDA: default: errorPrint("Unknown Architecture"); assert(0); break; } }
/* * Function: updo_up_gemm_starpu_common * * Parameters: * buffers - Data handlers : * 1 - L column block * 2 - Right-hand-side block facing the column block. * _args - Codelet arguments: * sopalin_data - global PaStiX internal data. * cblknum - Current column block index. */ static inline void updo_up_gemm_starpu_common(void * buffers[], void * _args, int arch) { starpu_updo_gemm_data_t * args = (starpu_updo_gemm_data_t*)_args; Sopalin_Data_t * sopalin_data = args->sopalin_data; SolverMatrix * datacode = sopalin_data->datacode; PASTIX_FLOAT * L = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]); PASTIX_FLOAT * RHS = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]); PASTIX_FLOAT * RHS2 = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[2]); PASTIX_INT stride = STARPU_MATRIX_GET_LD(buffers[0]); PASTIX_INT rhsnbr = STARPU_MATRIX_GET_NY(buffers[1]); PASTIX_INT rhssze = STARPU_MATRIX_GET_LD(buffers[1]); PASTIX_INT cblknum = args->cblknum; PASTIX_INT bloknum = args->bloknum; char * transpose = &(args->transpose); PASTIX_INT fcblknum = SYMB_CBLKNUM(bloknum); PASTIX_INT colnbr = CBLK_COLNBR(cblknum); PASTIX_INT rownbr = BLOK_ROWNBR(bloknum); PASTIX_FLOAT fun = 1.0; PASTIX_FLOAT * ga = L + SOLV_COEFIND(bloknum); PASTIX_FLOAT * gc = RHS2 + SYMB_FROWNUM(bloknum) - SYMB_FCOLNUM(fcblknum); ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN); ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN); switch(arch) { case ARCH_CPU: SOPALIN_GEMM(transpose,"N",colnbr,rhsnbr,rownbr,-fun,ga,stride, gc,rhssze,fun,RHS, UPDOWN_SM2XSZE); break; case ARCH_CUDA: default: errorPrint("Unknown Architecture"); assert(0); break; } }
static inline void API_CALL(wait_contrib_comp_1d)(Sopalin_Data_t *sopalin_data, PASTIX_INT me, PASTIX_INT i){ SolverMatrix *datacode = sopalin_data->datacode; #ifdef TRACE_SOPALIN Thread_Data_t *thread_data = sopalin_data->thread_data[me]; #endif #if (defined FORCE_CONSO) if (THREAD_FUNNELED_OFF) { /* Attente en Multiple / force_conso */ while(TASK_CTRBCNT(i)) { API_CALL(rcsd_testall_fab)(sopalin_data, me); } } else #endif { if (THREAD_COMM_OFF) { /* Attente en multiple sans force conso */ while(TASK_FTGTCNT(i)) { RECV_ONE_FANIN; } } } trace_begin_task(thread_data->tracefile, SOPALIN_CLOCK_TRACE, SOLV_PROCNUM, me, 1, STATE_WAITLOC, i); MUTEX_LOCK(&(sopalin_data->mutex_task[i])); #if (DBG_PASTIX_DYNSCHED > 0) ASSERTDBG(sopalin_data->taskmark[i] == 0, MOD_SOPALIN); sopalin_data->taskmark[i]++; #endif while (TASK_CTRBCNT(i)) { COND_WAIT(&(sopalin_data->cond_task[i]), &(sopalin_data->mutex_task[i])); } MUTEX_UNLOCK(&(sopalin_data->mutex_task[i])); }
BOOLEAN PEFile::RtlGetExports( ) /*++ Routine Description: Description. Arguments: - Return Value: BOOLEAN. --*/ { BOOLEAN Result = FALSE; PIMAGE_EXPORT_DIRECTORY ExportDir = NULL; ULONG DirRva, DirSize; PULONG AddressOfNames; PUSHORT AddressOfNameOrdinals; PULONG AddressOfFunctions; UINT i; ASSERTDBG(m_Image.Initialized); if (!m_Image.Initialized) goto CleanUp; DirRva = m_Image.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress; DirSize = m_Image.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].Size; if (!DirRva || (m_ImageSize && (DirRva > m_ImageSize))) goto CleanUp; PUCHAR Image = ((PUCHAR)m_Image.Image); if (!DirSize || !DirRva) goto CleanUp; ExportDir = (PIMAGE_EXPORT_DIRECTORY)(Image + DirRva); if ((ExportDir->AddressOfNames >= (DirRva + DirSize)) || (ExportDir->AddressOfNameOrdinals >= (DirRva + DirSize)) || (ExportDir->AddressOfFunctions >= (DirRva + DirSize))) { goto CleanUp; } AddressOfNames = (PULONG)(Image + (ULONG)ExportDir->AddressOfNames); AddressOfNameOrdinals = (PUSHORT)(Image + (ULONG)ExportDir->AddressOfNameOrdinals); AddressOfFunctions = (PULONG)(Image + (ULONG)ExportDir->AddressOfFunctions); #if VERBOSE_MODE g_Ext->Dml("(%s) ExportDir->NumberOfName: %d, ExportDir->NumberOfFunctions: %d\n", m_PdbInfo.PdbName, ExportDir->NumberOfNames, ExportDir->NumberOfFunctions); #endif m_NumberOfExportedFunctions = ExportDir->NumberOfNames; ULONG NumberOfHookedAPIs = 0; for (i = 0; i < ExportDir->NumberOfNames && i < 5000; i += 1) { EXPORT_INFO ExportInfo = { 0 }; if (AddressOfNameOrdinals[i] >= ExportDir->NumberOfNames) continue; ExportInfo.Address = AddressOfFunctions[AddressOfNameOrdinals[i]]; ExportInfo.Index = i; ExportInfo.Ordinal = AddressOfNameOrdinals[i]; ExportInfo.IsTablePatched = (ExportInfo.Address >= m_ImageSize) ? TRUE : FALSE; ExportInfo.IsHooked = IsPointerHooked(m_ImageBase + ExportInfo.Address); if (ExportInfo.IsTablePatched || ExportInfo.IsHooked) NumberOfHookedAPIs++; ULONG Len = (ULONG)strnlen_s((LPSTR)(Image + AddressOfNames[i]), sizeof(ExportInfo.Name) - 1); if ((AddressOfNames[i] <= (DirRva + DirSize)) && Len) { // strcpy_s(ExportInfo.Name, sizeof(ExportInfo.Name), (LPSTR)(Image + AddressOfNames[i])); memcpy_s(ExportInfo.Name, sizeof(ExportInfo.Name), (LPSTR)(Image + AddressOfNames[i]), Len); } else { strcpy_s(ExportInfo.Name, sizeof(ExportInfo.Name), "*unreadable*"); } m_Exports.push_back(ExportInfo); } m_NumberOfHookedAPIs = NumberOfHookedAPIs; Result = TRUE; CleanUp: return Result; }
/* * Factorization of diagonal block */ void factor_diag (Sopalin_Data_t *sopalin_data, PASTIX_INT me, PASTIX_INT c) { PASTIX_INT size,stride; PASTIX_FLOAT *ga = NULL; #ifdef SOPALIN_LU PASTIX_FLOAT *gb = NULL; #endif SolverMatrix *datacode = sopalin_data->datacode; Thread_Data_t *thread_data = sopalin_data->thread_data[me]; /* check if diagonal column block */ ASSERTDBG ( SYMB_FCOLNUM(c) == SYMB_FROWNUM(SYMB_BLOKNUM(c)), MOD_SOPALIN ); /* Initialisation des pointeurs de blocs */ ga = & (SOLV_COEFTAB(c)[ SOLV_COEFIND(SYMB_BLOKNUM(c))]); #ifdef SOPALIN_LU gb = & (SOLV_UCOEFTAB(c)[SOLV_COEFIND(SYMB_BLOKNUM(c))]); #endif size = SYMB_LCOLNUM (c)-SYMB_FCOLNUM(c)+1; stride = SOLV_STRIDE (c); #ifdef COMPUTE # ifdef CHOL_SOPALIN # ifdef SOPALIN_LU PASTIX_getrf_block (ga, size, size, stride, & (thread_data->nbpivot), sopalin_data->critere); DimTrans (ga,stride,size,gb); # else /* SOPALIN_LU */ /*SOPALIN_POF ("L",ga,stride,size);*/ PASTIX_potrf_block (ga, size, stride, & (thread_data->nbpivot), sopalin_data->critere); # endif /* SOPALIN_LU */ # else /* CHOL_SOPALIN */ /* version avec PPF full storage gb=maxbloktab1[me]; TALIGNF (ga,size,stride,gb); SOPALIN_PPF (gb,size,izero); TALIGNB (ga,size,stride,gb); */ # ifdef HERMITIAN PASTIX_hetrf_block (ga, size, stride, & (thread_data->nbpivot), sopalin_data->critere, thread_data->maxbloktab1); # else PASTIX_sytrf_block (ga, size, stride, & (thread_data->nbpivot), sopalin_data->critere, thread_data->maxbloktab1); # endif /* version BLAS 3 */ /* Copy diagonal for esp tasks */ if (sopalin_data->sopar->iparm[IPARM_ESP]) { stride++; SOPALIN_COPY (size, ga, stride, & (SOLV_UCOEFTAB(c)[SOLV_COEFIND(SYMB_BLOKNUM(c))]), iun); } # endif /* CHOL_SOPALIN */ #endif /* COMPUTE */ }