Exemplo n.º 1
0
rci_t mzd_slice_echelonize_ple(mzd_slice_t *A, int full) {
	mzp_t *P = mzp_init(A->nrows);
	mzp_t *Q = mzp_init(A->ncols);
	rci_t r;

	if(full) {
		r = mzd_slice_pluq(A, P, Q);

		mzd_slice_t *U = mzd_slice_init_window(A, 0, 0, r, r);
		const rci_t r_radix = m4ri_radix*(r/m4ri_radix);

		if(r_radix == r && r!=A->ncols) {
			mzd_slice_t *B = mzd_slice_init_window(A, 0, r, r, A->ncols);
			for(rci_t i = 0; i < r; ++i)
				mzd_slice_write_elem(U, i, i, 1);
			mzd_slice_trsm_upper_left(U, B);
			mzd_slice_free_window(B);
		} else if (r_radix != r && r!=A->ncols) {
			assert(r_radix < r);

			if(A->ncols > r_radix+m4ri_radix) {
				mzd_slice_t *B0  = mzd_slice_submatrix(NULL, A, 0, r_radix, r, r_radix+m4ri_radix);
				mzd_slice_t *B0w = mzd_slice_init_window(    A, 0, r_radix, r, r_radix+m4ri_radix);
				mzd_slice_t *B1  = mzd_slice_init_window(A, 0, r_radix+m4ri_radix, r, A->ncols);

				for(rci_t i = 0; i < r; ++i)
					mzd_slice_write_elem(U, i, i, 1);

				mzd_slice_trsm_upper_left(U, B0);
				mzd_slice_trsm_upper_left(U, B1);

				mzd_slice_copy(B0w, B0);
				mzd_slice_free(B0);
				mzd_slice_free_window(B0w);
				mzd_slice_free_window(B1);

			} else {
				mzd_slice_t *B = mzd_slice_submatrix(NULL, A, 0, r_radix, r, A->ncols);
				mzd_slice_t *Bw = mzd_slice_init_window(A, 0, r_radix, r, A->ncols);

				for(rci_t i = 0; i < r; ++i)
					mzd_slice_write_elem(U, i, i, 1);

				mzd_slice_trsm_upper_left(U, B);

				mzd_slice_copy(Bw, B);
				mzd_slice_free_window(Bw);
				mzd_slice_free(B);     
			}
		}

		mzd_slice_set_ui(U, 1);

		mzd_slice_free_window(U);

		if(r) {
			mzd_slice_t *A0 = mzd_slice_init_window(A, 0, 0, r, A->ncols);
			mzd_slice_apply_p_right(A0, Q);
			mzd_slice_free_window(A0);
		}

	} else {
		r = mzd_slice_ple(A, P, Q);

		for(rci_t i = 0; i < r; ++i) {
			for(int e=0; e < A->depth; e++) {
				for(rci_t j = 0; j <= i; j++) {
					int const length = MIN(m4ri_radix, i - j + 1);
					mzd_clear_bits(A->x[e], i, j, length);
				}
			}
			mzd_slice_write_elem(A, i, Q->values[i], 1);
		}
	}

	if(r != A->nrows) {
		mzd_slice_t *R = mzd_slice_init_window(A, r, 0, A->nrows, A->ncols);
		mzd_slice_set_ui(R, 0);
		mzd_slice_free_window(R);
	}

	mzp_free(P);
	mzp_free(Q);

	return r;
}
Exemplo n.º 2
0
int run(void *_p, unsigned long long *data, int *data_len) {
  struct elim_params *p = (struct elim_params *)_p;
#ifndef HAVE_LIBPAPI
  *data_len = 2;
#else
  *data_len = MIN(papi_array_len + 1, *data_len);
#endif
  int papi_res;

  mzd_t *A = mzd_init(p->m, p->n);

  if(p->r != 0) {
    mzd_t *L, *U;
    L = mzd_init(p->m, p->m);
    U = mzd_init(p->m, p->n);
    mzd_randomize(U);
    mzd_randomize(L);
    for (rci_t i = 0; i < p->m; ++i) {

      for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, p->m - j);
        mzd_clear_bits(L, i, j, length);
      }
      mzd_write_bit(L,i,i, 1);

      for (rci_t j = 0; j < i && j < p->n; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, i - j);
        mzd_clear_bits(U, i, j, length);
      }
      if(i < p->r) {
        mzd_write_bit(U, i, i, 1);
      } else {
        for (rci_t j = i; j < p->n; j+=m4ri_radix) {
          int const length = MIN(m4ri_radix, p->n - i);
          mzd_clear_bits(U, i, j, length);
        }
      }
    }
    mzd_mul(A,L,U,0);
    mzd_free(L);
    mzd_free(U);
  } else {
    mzd_randomize(A);
  }

  mzp_t *P = mzp_init(A->nrows);
  mzp_t *Q = mzp_init(A->ncols);

#ifndef HAVE_LIBPAPI
  data[0] = walltime(0);
  data[1] = cpucycles();
#else
  int array_len = *data_len - 1;
  unsigned long long t0 = PAPI_get_virt_usec();
  papi_res = PAPI_start_counters((int*)papi_events, array_len);
  if (papi_res)
    m4ri_die("");
#endif
  if(strcmp(p->algorithm, "m4ri") == 0)
    p->r = mzd_echelonize_m4ri(A, 0, 0);
  else if(strcmp(p->algorithm, "ple") == 0)
    p->r = mzd_ple(A, P, Q, 0);
  else if(strcmp(p->algorithm, "mmpf") == 0)
    p->r = _mzd_ple_russian(A, P, Q, 0);
  else
    m4ri_die("unknown algorithm %s",p->algorithm);
#ifndef HAVE_LIBPAPI
  data[1] = cpucycles() - data[1];
  data[0] = walltime(data[0]);
#else
  mzp_free(P);
  mzp_free(Q);

  PAPI_stop_counters((long long*)&data[1], array_len);
  t0 = PAPI_get_virt_usec() - t0;
  data[0] = t0;
  for (int nv = 0; nv <= array_len; ++nv) {
    data[nv] -= loop_calibration[nv];
  }
#endif
  mzd_free(A);
  return 0;
}
Exemplo n.º 3
0
int _mzd_pluq_solve_left(mzd_t const *A, rci_t rank, 
                         mzp_t const *P, mzp_t const *Q, 
                         mzd_t *B, int const cutoff, int const inconsistency_check) {
  /** A is supposed to store L lower triangular and U upper triangular
   *  B is modified in place 
   *  (Bi's in the comments are just modified versions of B)
   *  PLUQ = A
   *  1) P B2 = B1
   *  2) L B3 = B2
   *  3) U B4 = B3
   *  4) Q B5 = B4
   */

  int retval = 0;

  /* P B2 = B1 or B2 = P^T B1 */
  mzd_apply_p_left(B, P);
  
  /* L B3 = B2 */
  
  /* view on the upper part of L */
  mzd_t const *LU = mzd_init_window_const(A, 0, 0, rank, rank);
  mzd_t *Y1 = mzd_init_window(B, 0, 0, rank, B->ncols);
  mzd_trsm_lower_left(LU, Y1, cutoff);

  if (inconsistency_check) { /* Check for inconsistency */    
    /** FASTER without this check; update with the lower part of L
     */
    mzd_t const *H  = mzd_init_window_const(A, rank, 0, A->nrows, rank);
    mzd_t *Y2 = mzd_init_window(B, rank, 0, A->nrows, B->ncols);
    if(A->nrows < B->nrows) {
      mzd_t *Y3 = mzd_init_window(B, A->nrows, 0, B->nrows, B->ncols);
      mzd_set_ui(Y3, 0);
      mzd_free_window(Y3);
    }
    mzd_addmul(Y2, H, Y1, cutoff);
    /*
     * test whether Y2 is the zero matrix
     */
    if(!mzd_is_zero(Y2)) {
      retval = -1;
    }
    mzd_free_window((mzd_t*)H);
    mzd_free_window(Y2);
  }
  /* U B4 = B3 */
  mzd_trsm_upper_left(LU, Y1, cutoff);
  mzd_free_window((mzd_t*)LU);
  mzd_free_window(Y1);
  
  if (!inconsistency_check) {
    /** Default is to set the undefined bits to zero if inconsistency
     * has been checked then Y2 bits are already all zeroes thus this
     * clearing is not needed
     */
    for(rci_t i = rank; i < B->nrows; ++i) {
      for(rci_t j = 0; j < B->ncols; j += m4ri_radix) {
        mzd_clear_bits(B, i, j, MIN(m4ri_radix, B->ncols - j));
      }
    }
  }
  /* Q B5 = B4 or B5 = Q^T B4 */
  mzd_apply_p_left_trans(B, Q);

  /* P L U Q B5 = B1 */
  __M4RI_DD_MZD(B); 
  __M4RI_DD_INT(retval);
  return retval;
}
Exemplo n.º 4
0
int run_nothing(void *_p, unsigned long long *data, int *data_len) {
  struct elim_params *p = (struct elim_params *)_p;

  mzd_t *A = mzd_init(p->m, p->n);

  if(p->r != 0) {
    mzd_t *L, *U;
    L = mzd_init(p->m, p->m);
    U = mzd_init(p->m, p->n);
    mzd_randomize(U);
    mzd_randomize(L);
    for (rci_t i = 0; i < p->m; ++i) {

      for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, p->m - j);
        mzd_clear_bits(L, i, j, length);
      }
      mzd_write_bit(L,i,i, 1);

      for (rci_t j = 0; j < i && j <p->n; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, i - j);
        mzd_clear_bits(U, i, j, length);
      }
      if(i < p->r) {
        mzd_write_bit(U, i, i, 1);
      } else {
        for (rci_t j = i; j < p->n; j+=m4ri_radix) {
          int const length = MIN(m4ri_radix, p->n - j);
          mzd_clear_bits(U, i, j, length);
        }
      }
    }
    mzd_mul(A,L,U,0);
    mzd_free(L);
    mzd_free(U);
  } else {
    mzd_randomize(A);
  }

#ifndef HAVE_LIBPAPI
  *data_len = 2;
#else
  *data_len = MIN(papi_array_len + 1, *data_len);
#endif
  int papi_res;

#ifndef HAVE_LIBPAPI
  data[0] = walltime(0);
  data[1] = cpucycles();
#else
  int array_len = *data_len - 1;
  unsigned long long t0 = PAPI_get_virt_usec();
  papi_res = PAPI_start_counters((int*)papi_events, array_len);
  if(papi_res)
    m4ri_die("");
#endif

#ifndef HAVE_LIBPAPI
  data[1] = cpucycles() - data[1];
  data[0] = walltime(data[0]);
#else
  PAPI_stop_counters((long long*)&data[1], array_len);
  t0 = PAPI_get_virt_usec() - t0;
  data[0] = t0;
  for (int nv = 0; nv <= array_len; ++nv) {
    if (data[nv] < loop_calibration[nv])
      loop_calibration[nv] = data[nv];
  }
#endif

  mzd_free(A);

  return (0);
}