int ss_length (str_stream* ss) { if (0 == ss->initialized) ss_init (ss); return ss->empty_pos; }
LRESULT WINAPI ScreenSaverProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { unsigned long ul; switch(uMsg) { case WM_CREATE: if (ss_init(hWnd) == 0) return -1; SetTimer(hWnd, 999, 0, NULL); break; case WM_DESTROY: KillTimer(hWnd, 999); ss_term(); break; case WM_TIMER: KillTimer(hWnd, 999); ul = hack_draw(ss.dpy, ss.window, ss.closure); SetTimer(hWnd, 999, ul / 1000, NULL); break; default: return DefScreenSaverProc(hWnd, uMsg, wParam, lParam); } return 0; }
void ss_printf (str_stream* ss, char* fmt, ...) { int free_space; int n; va_list args; va_start (args, fmt); if (0 == ss->initialized) ss_init (ss); free_space = ss->max_length - ss->empty_pos; #ifdef WIN32 /* Returns -1 when buffer is too small */ while ((n = _vsnprintf (ss->buf + ss->empty_pos, free_space, fmt, args)) < 0) #else /* Returns necessary space when buffer is too small */ while ((n = vsnprintf (ss->buf + ss->empty_pos, free_space, fmt, args)) >= free_space) #endif { /* Not enough room to store output: double buffer size and try again */ ss->max_length = 2 * ss->max_length; ss->buf = (char *) realloc (ss->buf, (size_t)ss->max_length); free_space = ss->max_length - ss->empty_pos; if (NULL == ss->buf) fatal_error ("realloc failed\n"); } ss->empty_pos += n; /* Null terminate string (for debugging) if there is enough room*/ if (ss->empty_pos < ss->max_length) ss->buf[ss->empty_pos] = '\0'; va_end (args); }
static bool gx_joypad_init(void *data) { int i; SYS_SetResetCallback(reset_cb); #ifdef HW_RVL SYS_SetPowerCallback(power_callback); #endif (void)data; for (i = 0; i < MAX_PADS; i++) pad_type[i] = WPAD_EXP_NOCONTROLLER; PAD_Init(); #ifdef HW_RVL WPADInit(); #ifdef HAVE_LIBSICKSAXIS ss_init(sixaxis, USB_SLOTS); #endif #endif gx_joypad_poll(); return true; }
// Simple initialization function. static void init(void) { // Enable interrupts IRQ_ENABLE; // Initialize hardware timers timer_init(); // Initialize serial comms on UART0, // which is the hardware serial on arduino ser_init(&ser, SER_UART0); ser_setbaudrate(&ser, 9600); // For some reason BertOS sets the serial // to 7 bit characters by default. We set // it to 8 instead. UCSR0C = _BV(UCSZ01) | _BV(UCSZ00); // Create a modem context afsk_init(&afsk, ADC_CH); // ... and a protocol context with the modem ax25_init(&ax25, &afsk.fd, message_callback); // Init SimpleSerial ss_init(&ax25); // That's all! }
LRESULT WINAPI ScreenSaverProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { switch(uMsg) { case WM_CREATE: if (ss_init(hWnd) == 0) return -1; SetTimer(hWnd, 999, hack_delay / 1000, NULL); break; case WM_DESTROY: KillTimer(hWnd, 999); ss_term(); break; case WM_TIMER: hack_draw(&ss.modeinfo); break; default: return DefScreenSaverProc(hWnd, uMsg, wParam, lParam); } return 0; }
void ss_clear (str_stream* ss) { if (0 == ss->initialized) ss_init (ss); ss->empty_pos = 0; }
int main( int argc, const char** argv ){ ss_info info; ss_init( &info, argc, argv ); if ( 0 == info.exit_status ){ ss_run( &info ); } return ss_exit( &info ); }
void ss_erase (str_stream* ss, int n) { if (0 == ss->initialized) ss_init (ss); ss->empty_pos -= n; if (ss->empty_pos <0) ss->empty_pos = 0; }
int main(int argc, char *argv[]) { double t1, t2; Node root; StealStack *ss; /* initialize stealstacks and comm. layer */ ss = ss_init(&argc, &argv); /* determine benchmark parameters */ uts_parseParams(argc, argv); /* Initialize trace collection structures */ ss_initStats(ss); /* show parameter settings */ if (ss_get_thread_num() == 0) { uts_printParams(); } fflush(NULL); // Workers will return 1 from ss_start(), all others (managers) // will return 0 here once the computation ends if (ss_start(sizeof(Node), chunkSize)) { /* initialize root node and push on thread 0 stack */ if (ss_get_thread_num() == 0) { uts_initRoot(&root, type); #ifdef TRACE ss_markSteal(ss, 0); // first session is own "parent session" #endif ss_put_work(ss, &root); } /* time parallel search */ t1 = uts_wctime(); parTreeSearch(ss); t2 = uts_wctime(); ss->walltime = t2 - t1; #ifdef TRACE ss->startTime = t1; ss->sessionRecords[SS_IDLE][ss->entries[SS_IDLE] - 1].endTime = t2; #endif } ss_stop(); /* display results */ showStats(); ss_finalize(); return 0; }
bool DS3_Init() { USB_Initialize(); if (ss_init() < 0) { return false; } ss_initialize(&first); return true; }
char* ss_to_string (str_stream* ss) { if (0 == ss->initialized) ss_init (ss); if (ss->empty_pos == ss->max_length) { /* Not enough room to store output: increase buffer size and try again */ ss->max_length = ss->max_length + 1; ss->buf = (char *) realloc (ss->buf, (size_t)ss->max_length); if (NULL == ss->buf) fatal_error ("realloc failed\n"); } ss->buf[ss->empty_pos] = '\0'; /* Null terminate string */ ss->empty_pos += 1; return ss->buf; }
void init(void) { sei(); AFSK_init(&modem); ax25_init(&AX25, &modem.fd, ax25_callback); serial_init(&serial); stdout = &serial.uart0; stdin = &serial.uart0; #if SERIAL_PROTOCOL == PROTOCOL_KISS kiss_init(&AX25, &modem, &serial); #endif #if SERIAL_PROTOCOL == PROTOCOL_SIMPLE_SERIAL ss_init(&AX25); #endif }
static bool gx_joypad_init(void) { SYS_SetResetCallback(reset_cb); #ifdef HW_RVL SYS_SetPowerCallback(power_callback); #endif PAD_Init(); #ifdef HW_RVL WPADInit(); #endif #ifdef HAVE_LIBSICKSAXIS int i; USB_Initialize(); ss_init(); for (i = 0; i < MAX_PADS; i++) ss_initialize(&dev[i]); #endif gx_joypad_poll(); return true; }
/***************************************************************************//** * **/ void plasma_pzlange(plasma_context_t *plasma) { PLASMA_enum norm; PLASMA_desc A; double *work; double *result; PLASMA_sequence *sequence; PLASMA_request *request; int m, n; int next_m; int next_n; int ldam; int step, lrank; int X, X1, X2, Y, Y1, Y2; double* lwork; double normtmp, normtmp2; plasma_unpack_args_6(norm, A, work, result, sequence, request); *result = 0.0; if (PLASMA_RANK == 0) memset(work, 0, PLASMA_SIZE*sizeof(double)); ss_init(PLASMA_SIZE, 1, 0); switch (norm) { /* * PlasmaMaxNorm */ case PlasmaMaxNorm: n = 0; m = PLASMA_RANK; while (m >= A.mt && n < A.nt) { n++; m = m-A.mt; } while (n < A.nt) { next_m = m; next_n = n; next_m += PLASMA_SIZE; while (next_m >= A.mt && next_n < A.nt) { next_n++; next_m = next_m-A.mt; } X1 = m == 0 ? A.i %A.mb : 0; X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; X = X2 - X1; Y1 = n == 0 ? A.j %A.nb : 0; Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; Y = Y2 - Y1; ldam = BLKLDD(A, m); CORE_zlange(PlasmaMaxNorm, X, Y, A(m, n, X1, Y1, ldam), ldam, NULL, &normtmp); if (normtmp > work[PLASMA_RANK]) work[PLASMA_RANK] = normtmp; m = next_m; n = next_n; } ss_cond_set(PLASMA_RANK, 0, 1); break; /* * PlasmaOneNorm */ case PlasmaOneNorm: n = PLASMA_RANK; normtmp2 = 0.0; lwork = (double*)plasma_private_alloc(plasma, A.nb, PlasmaRealDouble); while (n < A.nt) { Y1 = n == 0 ? A.j %A.nb : 0; Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; Y = Y2 - Y1; memset(lwork, 0, A.nb*sizeof(double)); for (m = 0; m < A.mt; m++) { X1 = m == 0 ? A.i %A.mb : 0; X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; X = X2 - X1; ldam = BLKLDD(A, m); CORE_dzasum( PlasmaColumnwise, PlasmaUpperLower, X, Y, A(m, n, X1, Y1, ldam), ldam, lwork); } CORE_dlange(PlasmaMaxNorm, Y, 1, lwork, 1, NULL, &normtmp); if (normtmp > normtmp2) normtmp2 = normtmp; n += PLASMA_SIZE; } work[PLASMA_RANK] = normtmp2; ss_cond_set(PLASMA_RANK, 0, 1); plasma_private_free(plasma, lwork); break; /* * PlasmaInfNorm */ case PlasmaInfNorm: m = PLASMA_RANK; normtmp2 = 0.0; lwork = (double*)plasma_private_alloc(plasma, A.mb, PlasmaRealDouble); while (m < A.mt) { X1 = m == 0 ? A.i %A.mb : 0; X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; X = X2 - X1; ldam = BLKLDD(A, m); memset(lwork, 0, A.mb*sizeof(double)); for (n = 0; n < A.nt; n++) { Y1 = n == 0 ? A.j %A.nb : 0; Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; Y = Y2 - Y1; CORE_dzasum( PlasmaRowwise, PlasmaUpperLower, X, Y, A(m, n, X1, Y1, ldam), ldam, lwork); } CORE_dlange(PlasmaMaxNorm, X, 1, lwork, 1, NULL, &normtmp); if (normtmp > normtmp2) normtmp2 = normtmp; m += PLASMA_SIZE; } work[PLASMA_RANK] = normtmp2; ss_cond_set(PLASMA_RANK, 0, 1); plasma_private_free(plasma, lwork); break; /* * PlasmaFrobeniusNorm - not implemented */ case PlasmaFrobeniusNorm: default:; } if (norm != PlasmaFrobeniusNorm) { step = 1; lrank = PLASMA_RANK; while ( (lrank%2 == 0) && (PLASMA_RANK+step < PLASMA_SIZE) ) { ss_cond_wait(PLASMA_RANK+step, 0, step); work[PLASMA_RANK] = max(work[PLASMA_RANK], work[PLASMA_RANK+step]); lrank = lrank >> 1; step = step << 1; ss_cond_set(PLASMA_RANK, 0, step); } if (PLASMA_RANK > 0) { while( lrank != 0 ) { if (lrank%2 == 1) { ss_cond_set(PLASMA_RANK, 0, step); lrank = 0; } else { lrank = lrank >> 1; step = step << 1; ss_cond_set(PLASMA_RANK, 0, step); } } } if (PLASMA_RANK == 0) *result = work[0]; }
int main() { char *in; time_t t; error_t status; int i; srand((unsigned)time(&t)); // tele_command_t stored; // stored.data[0].t = OP; // stored.data[0].v = 2; // stored.data[1].t = NUMBER; // stored.data[1].v = 8; // stored.data[2].t = NUMBER; // stored.data[2].v = 10; // stored.separator = -1; // stored.l = 3; // printf("\nstored process: "); // process(&stored); in = malloc(256); printf("teletype. (blank line quits)\n\n"); scene_state_t ss; ss_init(&ss); do { printf("> "); fgets(in, 256, stdin); i = 0; while (in[i]) { in[i] = toupper(in[i]); i++; } tele_command_t temp; exec_state_t es; es_init(&es); char error_msg[TELE_ERROR_MSG_LENGTH]; status = parse(in, &temp, error_msg); if (status == E_OK) { status = validate(&temp, error_msg); printf("validate: %s", tele_error(status)); if (error_msg[0]) printf(": %s", error_msg); printf("\n"); if (status == E_OK) { process_result_t output = process_command(&ss, &es, &temp); if (output.has_value) { printf(">>> %i\n", output.value); } } } else { printf("ERROR: %s", tele_error(status)); if (error_msg[0]) printf(": %s", error_msg); printf("\n"); } // tele_tick(100); printf("\n"); } while (in[0] != 10); free(in); printf("(teletype exit.)\n"); }
/***************************************************************************//** * Parallel tile LU factorization - static scheduling **/ void plasma_pzgetrf_incpiv(plasma_context_t *plasma) { PLASMA_desc A; PLASMA_desc L; int *IPIV; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldam; int info; int tempkn, tempkm, tempmm, tempnn; int ib = PLASMA_IB; PLASMA_Complex64_t *work; plasma_unpack_args_5(A, L, IPIV, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*L.nb, L.dtyp); ss_init(A.mt, A.nt, -1); k = 0; n = PLASMA_RANK; while (n >= A.nt) { k++; n = n-A.nt+k; } m = k; while (k < min(A.mt, A.nt) && n < A.nt && !ss_aborted()) { next_n = n; next_m = m; next_k = k; next_m++; if (next_m == A.mt) { next_n += PLASMA_SIZE; while (next_n >= A.nt && next_k < min(A.mt, A.nt)) { next_k++; next_n = next_n-A.nt+next_k; } next_m = next_k; } tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; ldak = BLKLDD(A, k); ldam = BLKLDD(A, m); if (n == k) { if (m == k) { ss_cond_wait(k, k, k-1); CORE_zgetrf_incpiv( tempkm, tempkn, ib, A(k, k), ldak, IPIV(k, k), &info); if (info != 0 && m == A.mt-1) { plasma_request_fail(sequence, request, info + A.nb*k); ss_abort(); } ss_cond_set(k, k, k); } else { ss_cond_wait(m, k, k-1); CORE_ztstrf( tempmm, tempkn, ib, A.nb, A(k, k), ldak, A(m, k), ldam, L(m, k), L.mb, IPIV(m, k), work, L.nb, &info); if (info != 0 && m == A.mt-1) { plasma_request_fail(sequence, request, info + A.nb*k); ss_abort(); } ss_cond_set(m, k, k); } } else { if (m == k) { ss_cond_wait(k, k, k); ss_cond_wait(k, n, k-1); CORE_zgessm( tempkm, tempnn, tempkm, ib, IPIV(k, k), A(k, k), ldak, A(k, n), ldak); } else { ss_cond_wait(m, k, k); ss_cond_wait(m, n, k-1); CORE_zssssm( A.nb, tempnn, tempmm, tempnn, A.nb, ib, A(k, n), ldak, A(m, n), ldam, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); ss_cond_set(m, n, k); } } n = next_n; m = next_m; k = next_k; } plasma_private_free(plasma, work); ss_finalize(); }
/***************************************************************************//** * Parallel application of Q using tile V - LQ factorization - static scheduling **/ void plasma_pzunmlq(plasma_context_t *plasma) { PLASMA_enum side; PLASMA_enum trans; PLASMA_desc A; PLASMA_desc B; PLASMA_desc T; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldbk, ldbm; int tempmm, tempnn, tempkm, tempkmin; int minMT, minM; int ib = PLASMA_IB; PLASMA_Complex64_t *work; plasma_unpack_args_7(side, trans, A, B, T, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; if (side != PlasmaLeft) { plasma_request_fail(sequence, request, PLASMA_ERR_NOT_SUPPORTED); return; } if (trans != PlasmaConjTrans) { plasma_request_fail(sequence, request, PLASMA_ERR_NOT_SUPPORTED); return; } work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp); ss_init(B.mt, B.nt, min(A.mt, A.nt)); if (A.m > A.n) { minM = A.n; minMT = A.nt; } else { minM = A.m; minMT = A.mt; } k = minMT-1; n = PLASMA_RANK; while (n >= B.nt) { k--; n = n-B.nt; } m = B.mt-1; while (k >= 0 && n < B.nt) { next_n = n; next_m = m; next_k = k; next_m--; if (next_m == k-1) { next_n += PLASMA_SIZE; while (next_n >= B.nt && next_k >= 0) { next_k--; next_n = next_n-B.nt; } next_m = B.mt-1; } tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb; tempkm = k == B.mt-1 ? B.m-k*B.mb : B.mb; tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); ldbm = BLKLDD(B, m); if (m == k) { CORE_zunmlq( side, trans, tempkm, tempnn, tempkmin, ib, A(k, k), ldak, T(k, k), T.mb, B(k, n), ldbk, work, T.nb); ss_cond_set(k, n, k); } else { ss_cond_wait(m, n, k+1); CORE_ztsmlq( side, trans, A.mb, tempnn, tempmm, tempnn, tempkmin, ib, B(k, n), ldbk, B(m, n), ldbm, A(k, m), ldak, T(k, m), T.mb, work, ib); ss_cond_set(m, n, k); } m = next_m; n = next_n; k = next_k; } plasma_private_free(plasma, work); ss_finalize(); }
/***************************************************************************//** * Parallel tile Cholesky factorization - static scheduling **/ void plasma_pspotrf(plasma_context_t *plasma) { PLASMA_enum uplo; PLASMA_desc A; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldam, ldan; int info; int tempkn, tempmn; float zone = (float) 1.0; float mzone = (float)-1.0; plasma_unpack_args_4(uplo, A, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; ss_init(A.nt, A.nt, 0); k = 0; m = PLASMA_RANK; while (m >= A.nt) { k++; m = m-A.nt+k; } n = 0; while (k < A.nt && m < A.nt && !ss_aborted()) { next_n = n; next_m = m; next_k = k; next_n++; if (next_n > next_k) { next_m += PLASMA_SIZE; while (next_m >= A.nt && next_k < A.nt) { next_k++; next_m = next_m-A.nt+next_k; } next_n = 0; } tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempmn = m == A.nt-1 ? A.n-m*A.nb : A.nb; ldak = BLKLDD(A, k); ldan = BLKLDD(A, n); ldam = BLKLDD(A, m); if (m == k) { if (n == k) { /* * PlasmaLower */ if (uplo == PlasmaLower) { CORE_spotrf( PlasmaLower, tempkn, A(k, k), ldak, &info); } /* * PlasmaUpper */ else { CORE_spotrf( PlasmaUpper, tempkn, A(k, k), ldak, &info); } if (info != 0) { plasma_request_fail(sequence, request, info + A.nb*k); ss_abort(); } ss_cond_set(k, k, 1); } else { ss_cond_wait(k, n, 1); /* * PlasmaLower */ if (uplo == PlasmaLower) { CORE_ssyrk( PlasmaLower, PlasmaNoTrans, tempkn, A.nb, -1.0, A(k, n), ldak, 1.0, A(k, k), ldak); } /* * PlasmaUpper */ else { CORE_ssyrk( PlasmaUpper, PlasmaTrans, tempkn, A.nb, -1.0, A(n, k), ldan, 1.0, A(k, k), ldak); } } } else { if (n == k) { ss_cond_wait(k, k, 1); /* * PlasmaLower */ if (uplo == PlasmaLower) { CORE_strsm( PlasmaRight, PlasmaLower, PlasmaTrans, PlasmaNonUnit, tempmn, A.nb, zone, A(k, k), ldak, A(m, k), ldam); } /* * PlasmaUpper */ else { CORE_strsm( PlasmaLeft, PlasmaUpper, PlasmaTrans, PlasmaNonUnit, A.nb, tempmn, zone, A(k, k), ldak, A(k, m), ldak); } ss_cond_set(m, k, 1); } else { ss_cond_wait(k, n, 1); ss_cond_wait(m, n, 1); /* * PlasmaLower */ if (uplo == PlasmaLower) { CORE_sgemm( PlasmaNoTrans, PlasmaTrans, tempmn, A.nb, A.nb, mzone, A(m, n), ldam, A(k, n), ldak, zone, A(m, k), ldam); } /* * PlasmaUpper */ else { CORE_sgemm( PlasmaTrans, PlasmaNoTrans, A.nb, tempmn, A.nb, mzone, A(n, k), ldan, A(n, m), ldan, zone, A(k, m), ldak); } } } n = next_n; m = next_m; k = next_k; } ss_finalize(); }
/***************************************************************************//** * Parallel tile LQ factorization - static scheduling **/ void plasma_pcgelqf(plasma_context_t *plasma) { PLASMA_desc A; PLASMA_desc T; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldam; int tempkm, tempkn, tempmm, tempnn; int ib = PLASMA_IB; PLASMA_Complex32_t *work, *tau; plasma_unpack_args_4(A, T, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; work = (PLASMA_Complex32_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp); tau = (PLASMA_Complex32_t*)plasma_private_alloc(plasma, A.nb, A.dtyp); ss_init(A.mt, A.nt, -1); k = 0; m = PLASMA_RANK; while (m >= A.mt) { k++; m = m-A.mt+k; } n = k; while (k < min(A.mt, A.nt) && m < A.mt) { next_m = m; next_n = n; next_k = k; next_n++; if (next_n == A.nt) { next_m += PLASMA_SIZE; while (next_m >= A.mt && next_k < min(A.nt, A.mt)) { next_k++; next_m = next_m-A.mt+next_k; } next_n = next_k; } tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; ldak = BLKLDD(A, k); ldam = BLKLDD(A, m); if (m == k) { if (n == k) { ss_cond_wait(k, k, k-1); CORE_cgelqt( tempkm, tempkn, ib, A(k, k), ldak, T(k, k), T.mb, tau, work); ss_cond_set(k, k, k); } else { ss_cond_wait(k, n, k-1); CORE_ctslqt( tempkm, tempnn, ib, A(k, k), ldak, A(k, n), ldak, T(k, n), T.mb, tau, work); ss_cond_set(k, n, k); } } else { if (n == k) { ss_cond_wait(k, k, k); ss_cond_wait(m, k, k-1); CORE_cunmlq( PlasmaRight, PlasmaConjTrans, tempmm, tempkn, tempkn, ib, A(k, k), ldak, T(k, k), T.mb, A(m, k), ldam, work, T.nb); } else { ss_cond_wait(k, n, k); ss_cond_wait(m, n, k-1); CORE_ctsmlq( PlasmaRight, PlasmaConjTrans, tempmm, A.nb, tempmm, tempnn, A.nb, ib, A(m, k), ldam, A(m, n), ldam, A(k, n), ldak, T(k, n), T.mb, work, T.nb); ss_cond_set(m, n, k); } } m = next_m; n = next_n; k = next_k; } plasma_private_free(plasma, work); plasma_private_free(plasma, tau); ss_finalize(); }
/***************************************************************************//** * Parallel forward substitution for tile LU - static scheduling **/ void plasma_pztrsmpl(plasma_context_t *plasma) { PLASMA_desc A; PLASMA_desc B; PLASMA_desc L; int *IPIV; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldbk, ldam, ldbm; int tempkm, tempnn, tempkmin, tempmm, tempkn; int ib; plasma_unpack_args_6(A, B, L, IPIV, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; ss_init(B.mt, B.nt, -1); ib = PLASMA_IB; k = 0; n = PLASMA_RANK; while (n >= B.nt) { k++; n = n-B.nt; } m = k; while (k < min(A.mt, A.nt) && n < B.nt) { next_n = n; next_m = m; next_k = k; next_m++; if (next_m == A.mt) { next_n += PLASMA_SIZE; while (next_n >= B.nt && next_k < min(A.mt, A.nt)) { next_k++; next_n = next_n-B.nt; } next_m = next_k; } tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb; tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); if (m == k) { ss_cond_wait(k, n, k-1); CORE_zgessm( tempkm, tempnn, tempkmin, ib, IPIV(k, k), A(k, k), ldak, B(k, n), ldbk); ss_cond_set(k, n, k); } else { ss_cond_wait(m, n, k-1); CORE_zssssm( A.nb, tempnn, tempmm, tempnn, tempkn, ib, B(k, n), ldbk, B(m, n), ldbm, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); ss_cond_set(m, n, k); } n = next_n; m = next_m; k = next_k; } ss_finalize(); }
/***************************************************************************//** * Parallel tile QR factorization - static scheduling **/ void plasma_pdgeqrf(plasma_context_t *plasma) { PLASMA_desc A; PLASMA_desc T; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldam; int tempkm, tempkn, tempnn, tempmm; int ib = PLASMA_IB; double *work, *tau; plasma_unpack_args_4(A, T, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; work = (double*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp); tau = (double*)plasma_private_alloc(plasma, A.nb, A.dtyp); ss_init(A.mt, A.nt, -1); k = 0; n = PLASMA_RANK; while (n >= A.nt) { k++; n = n-A.nt+k; } m = k; while (k < min(A.mt, A.nt) && n < A.nt) { next_n = n; next_m = m; next_k = k; next_m++; if (next_m == A.mt) { next_n += PLASMA_SIZE; while (next_n >= A.nt && next_k < min(A.mt, A.nt)) { next_k++; next_n = next_n-A.nt+next_k; } next_m = next_k; } tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldak = BLKLDD(A, k); ldam = BLKLDD(A, m); if (n == k) { if (m == k) { ss_cond_wait(k, k, k-1); CORE_dgeqrt( tempkm, tempkn, ib, A(k, k), ldak, T(k, k), T.mb, tau, work); ss_cond_set(k, k, k); } else { ss_cond_wait(m, k, k-1); CORE_dtsqrt( tempmm, tempkn, ib, A(k, k), ldak, A(m, k), ldam, T(m, k), T.mb, tau, work); ss_cond_set(m, k, k); } } else { if (m == k) { ss_cond_wait(k, k, k); ss_cond_wait(k, n, k-1); CORE_dormqr( PlasmaLeft, PlasmaTrans, tempkm, tempnn, tempkm, ib, A(k, k), ldak, T(k, k), T.mb, A(k, n), ldak, work, T.nb); } else { ss_cond_wait(m, k, k); ss_cond_wait(m, n, k-1); CORE_dtsmqr( PlasmaLeft, PlasmaTrans, A.nb, tempnn, tempmm, tempnn, A.nb, ib, A(k, n), ldak, A(m, n), ldam, A(m, k), ldam, T(m, k), T.mb, work, ib); ss_cond_set(m, n, k); } } n = next_n; m = next_m; k = next_k; } plasma_private_free(plasma, work); plasma_private_free(plasma, tau); ss_finalize(); }
int main(int argc, char **argv) { short revents; int i, listenfd, sockfd; int ret = 0; struct link *ln; struct addrinfo *server_ai = NULL; struct addrinfo *local_ai = NULL; struct addrinfo hint; check_ss_option(argc, argv, "client"); memset(&hint, 0, sizeof(hint)); hint.ai_family = AF_UNSPEC; hint.ai_socktype = SOCK_STREAM; ret = getaddrinfo(ss_opt.server_addr, ss_opt.server_port, &hint, &server_ai); if (ret != 0) { pr_warn("getaddrinfo error: %s\n", gai_strerror(ret)); goto out; } pr_ai_notice(server_ai, "server address"); ret = getaddrinfo(ss_opt.local_addr, ss_opt.local_port, &hint, &local_ai); if (ret != 0) { pr_warn("getaddrinfo error: %s\n", gai_strerror(ret)); goto out; } pr_ai_notice(local_ai, "listening address"); if (crypto_init(ss_opt.password, ss_opt.method) == -1) { ret = -1; goto out; } ss_init(); listenfd = do_listen(local_ai, "tcp"); clients[0].fd = listenfd; clients[0].events = POLLIN; while (1) { pr_debug("start polling\n"); ret = poll(clients, nfds, TCP_INACTIVE_TIMEOUT * 1000); if (ret == -1) err_exit("poll error"); else if (ret == 0) { reaper(); continue; } if (clients[0].revents & POLLIN) { sockfd = accept(clients[0].fd, NULL, NULL); if (sockfd == -1) { pr_warn("accept error\n"); } else if (poll_set(sockfd, POLLIN) == -1) { close(sockfd); } else { ln = create_link(sockfd, "client"); if (ln == NULL) { poll_del(sockfd); close(sockfd); } else { ln->server = server_ai; } } } for (i = 1; i < nfds; i++) { sockfd = clients[i].fd; if (sockfd == -1) continue; revents = clients[i].revents; if (revents == 0) continue; ln = get_link(sockfd); if (ln == NULL) { sock_warn(sockfd, "close: can't get link"); close(sockfd); continue; } if (revents & POLLIN) { client_do_pollin(sockfd, ln); } if (revents & POLLOUT) { client_do_pollout(sockfd, ln); } /* suppress the noise */ /* if (revents & POLLPRI) { */ /* sock_warn(sockfd, "POLLPRI"); */ /* } else if (revents & POLLERR) { */ /* sock_warn(sockfd, "POLLERR"); */ /* } else if (revents & POLLHUP) { */ /* sock_warn(sockfd, "POLLHUP"); */ /* } else if (revents & POLLNVAL) { */ /* sock_warn(sockfd, "POLLNVAL"); */ /* } */ } reaper(); } out: crypto_exit(); if (server_ai) freeaddrinfo(server_ai); if (local_ai) freeaddrinfo(local_ai); ss_exit(); if (ret == -1) exit(EXIT_FAILURE); else exit(EXIT_SUCCESS); }