示例#1
0
    static void apply(V alpha, const matrix &A, const Vec1 &x,
            V beta, Vec2 &y)
    {
        const size_t nb  = A.brows;
        const size_t na  = A.nrows;
        const size_t ma  = A.ncols;
        const size_t b1 = A.block_size;
        const size_t b2 = b1 * b1;

        if (beta) {
            if (beta != 1) {
#pragma omp parallel for
                for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(na); ++i) {
                    y[i] *= beta;
                }
            }
        } else {
            backend::clear(y);
        }

#pragma omp parallel for
        for(ptrdiff_t ib = 0; ib < static_cast<ptrdiff_t>(nb); ++ib) {
            for(P jb = A.ptr[ib], eb = A.ptr[ib + 1]; jb < eb; ++jb) {
                size_t x0 = A.col[jb] * b1;
                size_t y0 = ib * b1;
                block_prod(b1, std::min(b1, ma - x0), std::min(b1, na - y0),
                        alpha, &A.val[jb * b2], &x[x0], &y[y0]
                        );
            }
        }
    }
示例#2
0
文件: timeblockmm.c 项目: di/school
main(int argc, char *argv[])
{
float **a,**b,**c;
int n,n1,n2;
int i,j;
//double t0,t1;
struct timeval t0,t1;
long mtime, seconds, useconds;

// Using PAPI - from countloop.c
if (PAPI_VER_CURRENT !=
    PAPI_library_init(PAPI_VER_CURRENT))
    ehandler("PAPI_library_init error.");

const size_t EVENT_MAX = PAPI_num_counters();
// Suppressing output
//    printf("# Max counters = %zd\n", EVENT_MAX);

if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
    ehandler("Cannot count PAPI_TOT_INS.");

if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
    ehandler("Cannot count PAPI_FP_OPS.");

if (PAPI_OK != PAPI_query_event(PAPI_L1_DCM))
    ehandler("Cannot count PAPI_L1_DCM.");

size_t EVENT_COUNT = 3;
int events[] = { PAPI_TOT_INS, PAPI_FP_OPS, PAPI_L1_DCM };
long long values[EVENT_COUNT];

// Take size from args, not prompt
// printf("Enter n:  ");  scanf("%d",&n);  printf("n = %d\n",n);
n = atoi(argv[1]);

//printf("Enter n1:  ");  scanf("%d",&n1);  printf("n1 = %d\n",n1);
//printf("Enter n2:  ");  scanf("%d",&n2);  printf("n2 = %d\n",n2);

// To conform to the other matrix functions
n1 = floor(sqrt(n));
n2 = n1;
n = n1*n2;
//printf("n = %d X %d = %d\n",n1,n2,n);
a = matrix(1,n,1,n);
for (i=1;i<=n;i++) 
    for (j=1;j<=n;j++) 
        a[i][j] = i+j;

b = matrix(1,n,1,n);
for (i=1;i<=n;i++) 
    for (j=1;j<=n;j++) 
        b[i][j] = i-j;

//#ifdef PRINT
//print_matrix(a,1,n,1,n);
//printf("\n"); */
//print_matrix(b,1,n,1,n);
//printf("\n"); */
//#endif

//t0 = get_seconds();
//c = matrix_prod(n,n,n,n,a,b);
//t1 = get_seconds();
//printf("Time for matrix_prod = %f sec\n",t1-t0);

//t0 = get_seconds();
gettimeofday(&t0, NULL);
// Start PAPI
PAPI_start_counters(events, EVENT_COUNT);

if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
    ehandler("Problem reading counters.");

c = block_prod(n1,n1,n1,n2,n2,n2,a,b);

if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
    ehandler("Problem reading counters.");

//t1 = get_seconds();
//printf("Time for block_prod = %f sec\n",t1-t0);
gettimeofday(&t1, NULL);
seconds = t1.tv_sec - t0.tv_sec;
useconds = t1.tv_usec - t0.tv_usec;
mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
//printf("Time for matrix_prod = %f sec\n",t1-t0);
printf("%d\t%lld\t%lld\t%lld\t%ld\n", n, values[0], values[1],
    values[2], mtime);
}