int mzd_pluq_solve_left (mzd_t const *A, rci_t rank, mzp_t const *P, mzp_t const *Q, mzd_t *B, int const cutoff, int const inconsistency_check) { if(A->ncols > B->nrows) m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to be lower than B nrows (%d).\n", A->ncols, B->nrows); if(P->length != A->nrows) m4ri_die("mzd_pluq_solve_left: A nrows (%d) need to match P size (%d).\n", A->nrows, P->length); if(Q->length != A->ncols) m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to match Q size (%d).\n", A->ncols, P->length); return _mzd_pluq_solve_left (A, rank, P, Q, B, cutoff, inconsistency_check); }
int main(int argc, char **argv) { global_options(&argc, &argv); if (argc < 5) { print_help(); m4ri_die(""); } struct smallops_params params; params.k = atoi(argv[1]); params.m = atoi(argv[2]); params.n = atoi(argv[3]); srandom(17); unsigned long long data[2]; if(strcmp(argv[4],"mzed_slice") == 0) { run_bench(run_mzed_slice, (void*)¶ms, data, 2); } else if(strcmp(argv[4],"mzed_cling") == 0) { run_bench(run_mzed_cling, (void*)¶ms, data, 2); } else if(strcmp(argv[4],"mzed_add") == 0) { run_bench(run_mzed_add, (void*)¶ms, data, 2); } double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n ); printf("%s: m: %5d, n: %5d, cpu cycles: %10llu, cc/(mn): %.5lf, wall time: %lf\n", argv[4], params.m, params.n, data[1], cc_per_op, data[0] / 1000000.0); }
void print_help_and_exit() { printf("Parameters m(, n, alg, r) expected.\n"); printf(" m -- integer > 0\n"); printf(" n -- integer > 0\n"); printf(" alg -- 'm4ri', 'ple', or 'mmpf' (default: 'ple')\n"); printf(" r -- target rank >= 0, if 0 then mzd_randomize() is called (default: MIN(m,n))\n"); printf("\n"); bench_print_global_options(stderr); m4ri_die(""); }
int main(int argc, char **argv) { global_options(&argc, &argv); if (argc != 3) { m4ri_die("Parameters m, n expected.\n"); } struct trsm_params p; p.m = atoi(argv[1]); p.n = atoi(argv[2]); srandom(17); unsigned long long data[2]; run_bench(run, (void*)&p, data, 2); printf("m: %5d, n: %5d, cpu cycles: %llu wall time: %lf\n", p.m, p.n, data[1], data[0] / 1000000.0); }
int mzd_solve_left(mzd_t *A, mzd_t *B, int const cutoff, int const inconsistency_check) { if(A->ncols > B->nrows) m4ri_die("mzd_solve_left: A ncols (%d) must be smaller than B nrows (%d).\n", A->ncols, B->nrows); return _mzd_solve_left(A, B, cutoff, inconsistency_check); }
int run(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; mzd_t *A = mzd_init(p->m, p->n); if(p->r != 0) { mzd_t *L, *U; L = mzd_init(p->m, p->m); U = mzd_init(p->m, p->n); mzd_randomize(U); mzd_randomize(L); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) { int const length = MIN(m4ri_radix, p->m - j); mzd_clear_bits(L, i, j, length); } mzd_write_bit(L,i,i, 1); for (rci_t j = 0; j < i && j < p->n; j+=m4ri_radix) { int const length = MIN(m4ri_radix, i - j); mzd_clear_bits(U, i, j, length); } if(i < p->r) { mzd_write_bit(U, i, i, 1); } else { for (rci_t j = i; j < p->n; j+=m4ri_radix) { int const length = MIN(m4ri_radix, p->n - i); mzd_clear_bits(U, i, j, length); } } } mzd_mul(A,L,U,0); mzd_free(L); mzd_free(U); } else { mzd_randomize(A); } mzp_t *P = mzp_init(A->nrows); mzp_t *Q = mzp_init(A->ncols); #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int*)papi_events, array_len); if (papi_res) m4ri_die(""); #endif if(strcmp(p->algorithm, "m4ri") == 0) p->r = mzd_echelonize_m4ri(A, 0, 0); else if(strcmp(p->algorithm, "ple") == 0) p->r = mzd_ple(A, P, Q, 0); else if(strcmp(p->algorithm, "mmpf") == 0) p->r = _mzd_ple_russian(A, P, Q, 0); else m4ri_die("unknown algorithm %s",p->algorithm); #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else mzp_free(P); mzp_free(Q); PAPI_stop_counters((long long*)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { data[nv] -= loop_calibration[nv]; } #endif mzd_free(A); return 0; }
int run_nothing(void *_p, unsigned long long *data, int *data_len) { struct elim_params *p = (struct elim_params *)_p; mzd_t *A = mzd_init(p->m, p->n); if(p->r != 0) { mzd_t *L, *U; L = mzd_init(p->m, p->m); U = mzd_init(p->m, p->n); mzd_randomize(U); mzd_randomize(L); for (rci_t i = 0; i < p->m; ++i) { for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) { int const length = MIN(m4ri_radix, p->m - j); mzd_clear_bits(L, i, j, length); } mzd_write_bit(L,i,i, 1); for (rci_t j = 0; j < i && j <p->n; j+=m4ri_radix) { int const length = MIN(m4ri_radix, i - j); mzd_clear_bits(U, i, j, length); } if(i < p->r) { mzd_write_bit(U, i, i, 1); } else { for (rci_t j = i; j < p->n; j+=m4ri_radix) { int const length = MIN(m4ri_radix, p->n - j); mzd_clear_bits(U, i, j, length); } } } mzd_mul(A,L,U,0); mzd_free(L); mzd_free(U); } else { mzd_randomize(A); } #ifndef HAVE_LIBPAPI *data_len = 2; #else *data_len = MIN(papi_array_len + 1, *data_len); #endif int papi_res; #ifndef HAVE_LIBPAPI data[0] = walltime(0); data[1] = cpucycles(); #else int array_len = *data_len - 1; unsigned long long t0 = PAPI_get_virt_usec(); papi_res = PAPI_start_counters((int*)papi_events, array_len); if(papi_res) m4ri_die(""); #endif #ifndef HAVE_LIBPAPI data[1] = cpucycles() - data[1]; data[0] = walltime(data[0]); #else PAPI_stop_counters((long long*)&data[1], array_len); t0 = PAPI_get_virt_usec() - t0; data[0] = t0; for (int nv = 0; nv <= array_len; ++nv) { if (data[nv] < loop_calibration[nv]) loop_calibration[nv] = data[nv]; } #endif mzd_free(A); return (0); }