void square_dgemm(const int M, const double *A, const double *B, double *C) { if (M > 100) { const int n_blocks = M / BLOCK_SIZE + (M%BLOCK_SIZE? 1 : 0); //double* t = (double*)malloc(BLOCK_SIZE * BLOCK_SIZE * sizeof(double)); double aa[BLOCK_SIZE * BLOCK_SIZE] = {0}; double bb[BLOCK_SIZE * BLOCK_SIZE] = {0}; double cc[BLOCK_SIZE * BLOCK_SIZE] = {0}; int bi, bj, bk; for (bi = 0; bi < n_blocks; ++bi) { const int i = bi * BLOCK_SIZE; for (bj = 0; bj < n_blocks; ++bj) { const int j = bj * BLOCK_SIZE; for (bk = 0; bk < n_blocks; ++bk) { const int k = bk * BLOCK_SIZE; do_block(M, A, B, C, aa, bb, cc, i, j, k); } } } } else { naive_square_dgemm(M, A, B, C); } }
long do_sched_op_compat(int cmd, unsigned long arg) { long ret = 0; switch ( cmd ) { case SCHEDOP_yield: { ret = do_yield(); break; } case SCHEDOP_block: { ret = do_block(); break; } case SCHEDOP_shutdown: { TRACE_3D(TRC_SCHED_SHUTDOWN, current->domain->domain_id, current->vcpu_id, arg); domain_shutdown(current->domain, (u8)arg); break; } default: ret = -ENOSYS; } return ret; }
/* This routine performs a dgemm operation * C := C + A * B * where A, B, and C are lda-by-lda matrices stored in column-major format. * On exit, A and B maintain their input values. */ void square_dgemm(int lda, double* A, double* B, double* C) { int block_size_row = 222; int block_size_col = 12; int block_size_inner = 222; int M_even, K_even, N_even; double new_A[50000]; double new_B[200000]; double new_C[4000]; for (int k=0; k<lda; k+=block_size_inner) { int K = min(block_size_inner, lda-k); copy_block(lda, K, lda, B+k, new_B); K_even = turn_even(K); for (int i=0; i<lda; i+=block_size_row) { int M = min (block_size_row, lda-i); copy_block(lda, M, K, A+i+k*lda, new_A); M_even = turn_even(M); for (int j=0; j<lda; j+=block_size_col) { int N = min (block_size_col, lda-j); N_even = turn_even(N); copy_block(lda, M, N, C+i+j*lda, new_C); do_block(M_even, K_even, N_even, new_A, new_B+j*K_even, new_C); add_block(new_C, C+i+j*lda, M, N, lda, M_even); } } } }
void output(const char *lojban, const char *trans, const char *selmao) { switch (ofmt) { case OF_LATEX: printf ("\\begin{tabular}[t]{l}" "\\textbf{\\footnotesize %s}\\\\\n" "\\textrm{\\footnotesize %s}\\\\\n" "\\textit{\\footnotesize %s}\n" "\\end{tabular}\n" "\\rule{0in}{1.0\\baselineskip}", lojban, selmao, trans); break; case OF_TEXT: printf ("%s <%s> [%s] ", lojban, selmao, trans); break; case OF_TEXTBLK: do_block(lojban, selmao, trans); break; #ifdef PLIST case OF_PLIST: dictionary = PLInsertDictionaryEntry(dictionary, PLMakeString(lojban), PLMakeString(trans)); break; #endif //PLIST } }
void square_dgemm( int M, double *A, double *B, double *C ) { for( int i = 0; i < M; i += BLOCK_SIZE ) for( int j = 0; j < M; j += BLOCK_SIZE ) for( int k = 0; k < M; k += BLOCK_SIZE ) do_block( M, A, B, C, i, j, k ); }
static stmt_code(stream, node, brk, cont, ret) { /* Handle the null expression. */ if ( !node ) return; auto op = node[0]; /* For debugging purposes, put a blank line between each statement. */ fputs("\n", stream); if ( op == 'dcls' ) declaration( stream, node ); else if ( op == 'brea' ) branch( stream, brk ); else if ( op == 'cont' ) branch( stream, cont ); else if ( op == 'retu' ) return_stmt( stream, node, ret ); else if ( op == 'goto' ) goto_stmt( stream, node ); else if ( op == 'if' ) if_stmt( stream, node, brk, cont, ret ); else if ( op == 'whil' ) while_stmt( stream, node, brk, cont, ret ); else if ( op == 'for' ) for_stmt( stream, node, brk, cont, ret ); else if ( op == 'do' ) do_stmt( stream, node, brk, cont, ret ); else if ( op == 'swit' ) switch_stmt( stream, node, brk, cont, ret ); else if ( op == 'case' || op == 'defa' ) case_stmt( stream, node, brk, cont, ret ); else if ( op == ':' ) label_stmt( stream, node, brk, cont, ret ); else if ( op == '{}' ) do_block( stream, node, brk, cont, ret ); else expr_code( stream, node, 0 ); }
void output_paren(const char *text) { switch (ofmt) { case OF_LATEX: printf ("\\textrm{\\footnotesize %s}", text); break; case OF_TEXT: printf ("(%s) ", text); break; case OF_TEXTBLK: do_block("(", "(", "("); do_block(text, "", ""); do_block(")", ")", ")"); break; } }
static fn_decl(stream, name, decl, block, frame_sz) { auto ret = new_label(); start_fn(decl); prolog(stream, name, frame_sz); do_block(stream, block, -1, -1, ret); emit_label( stream, ret ); epilog(stream, frame_sz); end_fn(); }
virtual void cipher(block_cipher::g_type::container_type & data) { int blocks = data.size() / m_blocklen; data_type * in = data.data(); for (int i=0; i<blocks; i++) { do_block(in); in += m_blocklen; } }
static void input_from_stdin(size_t offset, size_t fp_len, int action, int options) { size_t len, buf_len; if (action == BMZ_A_LIST) { do_list(0); } else { void *data = read_from_fp(stdin, &len, &buf_len); do_block(data, len, buf_len, offset, fp_len, action, options); } }
/*static*/ void *rnc_pack (void *original, long datalen, long *packlen) { int i; char *data = original; long origlen = datalen; packed = malloc(PACKED_DELTA); if (!packed) { perror("malloc"); exit(1); } packedlen = PACKED_DELTA; packedpos = 20; bwrite (packed+4, datalen); bitpos = 18; bitcount = 0; bitbuf = 0; write_bits (0, 2); while (datalen > 0) { blklen = datalen > BLOCKMAX ? BLOCKMAX : datalen; blkstart = WINMAX - BLOCKMAX; if (blkstart > origlen-datalen) blkstart = origlen-datalen; memcpy (blk, data-blkstart, blkstart+blklen); for (i=0; i<HASHMAX; i++) hashp[i] = -1; ntuple = 0; tuples[ntuple].rawlen = 0; blklen += blkstart; do_block(); data += bpos - blkstart; datalen -= bpos - blkstart; write_block(); } if (bitcount > 0) { write_bits (0, 17-bitcount); /* force flush */ packedpos -= 2; /* write_bits will have moved it on */ } *packlen = packedpos; bwrite (packed, RNC_SIGNATURE); bwrite (packed+12, rnc_crc(packed+18, packedpos-18)); bwrite (packed+10, rnc_crc(original, origlen)); bwrite (packed+8, packedpos-18); packed[16] = packed[17] = 0; return packed; }
void square_dgemm(const int M, const double *A, const double *B, double *C) { const int n_blocks = M / BLOCK_SIZE + (M%BLOCK_SIZE? 1 : 0); int bi, bj, bk; for (bi = 0; bi < n_blocks; ++bi) { const int i = bi * BLOCK_SIZE; for (bj = 0; bj < n_blocks; ++bj) { const int j = bj * BLOCK_SIZE; for (bk = 0; bk < n_blocks; ++bk) { const int k = bk * BLOCK_SIZE; do_block(M, A, B, C, i, j, k); } } } }
void square_dgemm (int lda, double* A, double* B, double* C) { double *A_block, *B_block; posix_memalign((void **)&A_block, 64, BLOCK_L1 * BLOCK_L1 * sizeof(double)); posix_memalign((void **)&B_block, 64, BLOCK_L1 * BLOCK_L1 * sizeof(double)); // reorcder loops for cache efficiency for (int t = 0; t < lda; t += BLOCK_L2) { // For each block column of B for (int s = 0; s < lda; s += BLOCK_L2) { // For each block row of A for (int r = 0; r < lda; r += BLOCK_L2) { // compute end index of smaller block int end_k = t + min(BLOCK_L2, lda-t); int end_j = s + min(BLOCK_L2, lda-s); int end_i = r + min(BLOCK_L2, lda-r); for (int k = t; k < end_k; k += BLOCK_L1) { // For each block column of B for (int j = s; j < end_j; j += BLOCK_L1) { // For each block row of A for (int i = r; i < end_i; i += BLOCK_L1) { // compute block size int K = min(BLOCK_L1, end_k-k); int N = min(BLOCK_L1, end_j-j); int M = min(BLOCK_L1, end_i-i); /* Performs a smaller dgemm operation * C' := C' + A' * B' * where C' is M-by-N, A' is M-by-K, and B' is K-by-N. */ //do_block(lda, M, N, K, A + i + k*lda, B + k + j*lda, C + i + j*lda); do_block(lda, M, N, K, A + i + k*lda, B + k + j*lda, C + i + j*lda, A_block, B_block); } } } } } } free(A_block); free(B_block); }
void do_block2(const int lda, const double * A, const double * B, double * C, const int i, const int j, const int k) { const int n_blocks = BLOCK2_SIZE / BLOCK3_SIZE + (BLOCK2_SIZE%BLOCK3_SIZE? 1 : 0); int bi, bj, bk; for (bi = 0; bi < n_blocks; ++bi) { const int r = bi * BLOCK3_SIZE; for (bj = 0; bj < n_blocks; ++bj) { const int s = bj * BLOCK3_SIZE; for (bk = 0; bk < n_blocks; ++bk) { const int t = bk * BLOCK3_SIZE; do_block(lda, A, B, C, i+r, j+s, k+t); } } } }
static void input_from_file(const char *fname, size_t offset, size_t fp_len, int action, int options) { size_t len, buf_len; int fd = open(fname, O_RDONLY, 0); if (fd == -1) DIE("cannot open '%s'", fname); if (action == BMZ_A_LIST) { do_list(fd); } else { void *data = read_from_fd(fd, &len, &buf_len); do_block(data, len, buf_len, offset, fp_len, action, options); } /* close and free etc. are omitted intentionally */ }
/* This routine performs a dgemm operation * C := C + A * B * where A, B, and C are lda-by-lda matrices stored in column-major format. * On exit, A and B maintain their input values. */ void square_dgemm (int lda, double* A, double* B, double* C) { /* For each block-row of A */ for (int i = 0; i < lda; i += BLOCK_SIZE) /* For each block-column of B */ for (int j = 0; j < lda; j += BLOCK_SIZE) /* Accumulate block dgemms into block of C */ for (int k = 0; k < lda; k += BLOCK_SIZE) { /* Correct block dimensions if block "goes off edge of" the matrix */ int M = min (BLOCK_SIZE, lda-i); int N = min (BLOCK_SIZE, lda-j); int K = min (BLOCK_SIZE, lda-k); /* Perform individual block dgemm */ do_block(lda, M, N, K, A + i + k*lda, B + k + j*lda, C + i + j*lda); } }
void block_sigio(void) { PRINT_TIME(NOFD, &tnow, &tprev, "block_sigio: entered sigio_blocked = %d", sigio_blocked); if (sigio_blocked == 1) { PRINT_TIME(NOFD, &tnow, &tprev, "block_sigio: is already blocked returning"); return; } DEBG(MSG_INTR, "block_sigio: blocking\n"); do_block(); num_block_sigio++; DEBG(MSG_INTR, "block_sigio: unblocked = %15d blocked = %15d\n", num_unblock_sigio, num_block_sigio); DEBG(MSG_INTR, "block_sigio: setting sigio_blocked = 1\n"); sigio_blocked = 1; DEBG(MSG_INTR, "block_sigio: done setting sigio_blocked = 1\n"); PRINT_TIME(NOFD, &tnow, &tprev, "block_sigio: returning"); }
void block_all_signals() { do_block(1); }
static void push_buffer_impl( XferElement *elt, gpointer buf, size_t len) { XferDestDevice *self = XFER_DEST_DEVICE(elt); gpointer to_free = buf; /* Handle EOF */ if (!buf) { /* write out the partial buffer, if there's anything in it */ if (self->partial_length) { if (!do_block(self, self->block_size, self->partial)) { return; } self->partial_length = 0; } device_finish_file(self->device); return; } /* set up the block buffer, now that we can depend on having a blocksize * from the device */ if (!self->partial) { self->partial = g_try_malloc(self->device->block_size); if (self->partial == NULL) { xfer_cancel_with_error(elt, "%s: Cannot allocate memory", self->device->device_name); wait_until_xfer_cancelled(elt->xfer); return; } self->block_size = self->device->block_size; self->partial_length = 0; } /* if we already have data in the buffer, add the new data to it */ if (self->partial_length != 0) { gsize to_copy = min(self->block_size - self->partial_length, len); memmove((char *)self->partial + self->partial_length, buf, to_copy); buf = (gpointer)(to_copy + (char *)buf); len -= to_copy; self->partial_length += to_copy; } /* and if the buffer is now full, write the block */ if (self->partial_length == self->block_size) { if (!do_block(self, self->block_size, self->partial)) { g_free(to_free); return; } self->partial_length = 0; } /* write any whole blocks directly from the push buffer */ while (len >= self->block_size) { if (!do_block(self, self->block_size, buf)) { g_free(to_free); return; } buf = (gpointer)(self->block_size + (char *)buf); len -= self->block_size; } /* and finally store any leftover data in the partial buffer */ if (len) { memmove(self->partial, buf, len); self->partial_length = len; } g_free(to_free); }
void gnupg_block_all_signals () { do_block(1); }
void gnupg_unblock_all_signals () { do_block(0); }
void unblock_all_signals() { do_block(0); }