Example #1
0
#include "bli_x86_asm_macros.h"

void bli_sgemm_penryn_asm_8x4
     (
       dim_t               k0,
       float*     restrict alpha,
       float*     restrict a,
       float*     restrict b,
       float*     restrict beta,
       float*     restrict c, inc_t rs_c0, inc_t cs_c0,
       auxinfo_t* restrict data,
       cntx_t*    restrict cntx
     )
{
	//void*   a_next = bli_auxinfo_next_a( data );
	void*   b_next = bli_auxinfo_next_b( data );

	// Typecast local copies of integers in case dim_t and inc_t are a
	// different size than is expected by load instructions.
	uint64_t k_iter = k0 / 4;
	uint64_t k_left = k0 % 4;
	uint64_t rs_c   = rs_c0;
	uint64_t cs_c   = cs_c0;

	begin_asm()
		
		
		mov(var(a), rax) // load address of a.
		mov(var(b), rbx) // load address of b.
		mov(var(b_next), r9) // load address of b_next.
		
Example #2
0
//#define MONITORS
//#define LOOPMON
void bli_dgemm_asm_30x8
     (
       dim_t               k,
       double*    restrict alpha,
       double*    restrict a,
       double*    restrict b,
       double*    restrict beta,
       double*    restrict c, inc_t rs_c, inc_t cs_c,
       auxinfo_t* restrict data,
       cntx_t*    restrict cntx
     )
{
    double * a_next = bli_auxinfo_next_a( data );
    double * b_next = bli_auxinfo_next_b( data );

    int * offsetPtr = &offsets[0];

    uint64_t k64 = k;

#ifdef MONITORS
    int toph, topl, both, botl, midl, midh, mid2l, mid2h;
#endif
#ifdef LOOPMON
    int tlooph, tloopl, blooph, bloopl;
#endif
    
    __asm
    {
#ifdef MONITORS