mzd_t *mzd_kernel_left_pluq(mzd_t *A, int const cutoff) {
  mzp_t *P = mzp_init(A->nrows);
  mzp_t *Q = mzp_init(A->ncols);

  rci_t r = mzd_pluq(A, P, Q, cutoff);

  if (r == A->ncols) {
    mzp_free(P);
    mzp_free(Q);
    __M4RI_DD_MZD(A);
    return NULL;
  }

  mzd_t *U = mzd_init_window(A, 0, 0, r, r);
  mzd_t *B = mzd_init_window(A, 0, r, r, A->ncols);

  mzd_trsm_upper_left(U, B, cutoff);

  mzd_t *R = mzd_init(A->ncols, A->ncols - r);
  mzd_t *RU = mzd_init_window(R, 0, 0, r, R->ncols);
  mzd_copy(RU, B);
  for(rci_t i = 0; i < R->ncols; ++i) {
    mzd_write_bit(R, r + i, i, 1);
  }
  mzd_apply_p_left_trans(R, Q);
  mzp_free(P);
  mzp_free(Q);
  mzd_free_window(RU);
  mzd_free_window(U);
  mzd_free_window(B);

  __M4RI_DD_MZD(A);
  __M4RI_DD_MZD(R);
  return R;
}
int _mzd_solve_left(mzd_t *A, mzd_t *B, int const cutoff, int const inconsistency_check) {
  /**
   *  B is modified in place 
   *  (Bi's in the comments are just modified versions of B)
   *  1) PLUQ = A
   *  2) P B2 = B1
   *  3) L B3 = B2
   *  4) U B4 = B3
   *  5) Q B5 = B4
   */
  mzp_t *P = mzp_init(A->nrows);
  mzp_t *Q = mzp_init(A->ncols);
  
  /* PLUQ = A */
  rci_t rank = _mzd_pluq(A, P, Q, cutoff);
  /* 2, 3, 4, 5 */
  int retval = mzd_pluq_solve_left(A, rank, P, Q, B, cutoff, inconsistency_check);
  
  mzp_free(P);
  mzp_free(Q);

  __M4RI_DD_MZD(A);
  __M4RI_DD_MZD(B);
  return retval;
}
int test_pluq_structured(rci_t m, rci_t n) {

  printf("pluq: testing structured m: %5d, n: %5d", m, n);

  mzd_t* A = mzd_init(m, n);
  mzd_t* L = mzd_init(m, m);
  mzd_t* U = mzd_init(m, n);

  for(rci_t i = 0; i < m; i += 2)
    for (rci_t j = i; j < n; ++j)
      mzd_write_bit(A, i, j, 1);

  mzd_t* Acopy = mzd_copy (NULL,A);

  mzp_t* P = mzp_init(m);
  mzp_t* Q = mzp_init(n);
  rci_t r = mzd_pluq(A, P, Q, 0);
  printf(", rank: %5d ",r);

  for (rci_t i = 0; i < r; ++i){
    for (rci_t j = 0; j < i; ++j)
      mzd_write_bit(L, i, j, mzd_read_bit(A,i,j));
    for (rci_t j = i + 1; j < n; ++j)
      mzd_write_bit(U, i, j, mzd_read_bit(A,i,j));
  }
  for (rci_t i = r; i < m; ++i)
    for (rci_t j = 0; j < r; ++j)
      mzd_write_bit(L, i, j, mzd_read_bit(A,i,j));
  for (rci_t i = 0; i < r; ++i){
    mzd_write_bit(L,i,i, 1);
    mzd_write_bit(U,i,i, 1);
  }

  mzd_apply_p_left(Acopy, P);
  mzd_apply_p_right_trans(Acopy, Q);

  mzd_addmul(Acopy, L, U, 0);
  int status = 0;
  for (rci_t i = 0; i < m; ++i)
    for (rci_t j = 0; j < n; ++j){
      if (mzd_read_bit (Acopy,i,j)){
	status = 1;
        break;
      }
    }

  if (status) {
    printf("\n");
    printf(" ... FAILED\n");
  }  else
    printf (" ... passed\n");
  mzd_free(U);
  mzd_free(L);
  mzd_free(A);
  mzd_free(Acopy);
  mzp_free(P);
  mzp_free(Q);
  return status;
}
Beispiel #4
0
rci_t mzd_slice_echelonize_ple(mzd_slice_t *A, int full) {
	mzp_t *P = mzp_init(A->nrows);
	mzp_t *Q = mzp_init(A->ncols);
	rci_t r;

	if(full) {
		r = mzd_slice_pluq(A, P, Q);

		mzd_slice_t *U = mzd_slice_init_window(A, 0, 0, r, r);
		const rci_t r_radix = m4ri_radix*(r/m4ri_radix);

		if(r_radix == r && r!=A->ncols) {
			mzd_slice_t *B = mzd_slice_init_window(A, 0, r, r, A->ncols);
			for(rci_t i = 0; i < r; ++i)
				mzd_slice_write_elem(U, i, i, 1);
			mzd_slice_trsm_upper_left(U, B);
			mzd_slice_free_window(B);
		} else if (r_radix != r && r!=A->ncols) {
			assert(r_radix < r);

			if(A->ncols > r_radix+m4ri_radix) {
				mzd_slice_t *B0  = mzd_slice_submatrix(NULL, A, 0, r_radix, r, r_radix+m4ri_radix);
				mzd_slice_t *B0w = mzd_slice_init_window(    A, 0, r_radix, r, r_radix+m4ri_radix);
				mzd_slice_t *B1  = mzd_slice_init_window(A, 0, r_radix+m4ri_radix, r, A->ncols);

				for(rci_t i = 0; i < r; ++i)
					mzd_slice_write_elem(U, i, i, 1);

				mzd_slice_trsm_upper_left(U, B0);
				mzd_slice_trsm_upper_left(U, B1);

				mzd_slice_copy(B0w, B0);
				mzd_slice_free(B0);
				mzd_slice_free_window(B0w);
				mzd_slice_free_window(B1);

			} else {
				mzd_slice_t *B = mzd_slice_submatrix(NULL, A, 0, r_radix, r, A->ncols);
				mzd_slice_t *Bw = mzd_slice_init_window(A, 0, r_radix, r, A->ncols);

				for(rci_t i = 0; i < r; ++i)
					mzd_slice_write_elem(U, i, i, 1);

				mzd_slice_trsm_upper_left(U, B);

				mzd_slice_copy(Bw, B);
				mzd_slice_free_window(Bw);
				mzd_slice_free(B);     
			}
		}

		mzd_slice_set_ui(U, 1);

		mzd_slice_free_window(U);

		if(r) {
			mzd_slice_t *A0 = mzd_slice_init_window(A, 0, 0, r, A->ncols);
			mzd_slice_apply_p_right(A0, Q);
			mzd_slice_free_window(A0);
		}

	} else {
		r = mzd_slice_ple(A, P, Q);

		for(rci_t i = 0; i < r; ++i) {
			for(int e=0; e < A->depth; e++) {
				for(rci_t j = 0; j <= i; j++) {
					int const length = MIN(m4ri_radix, i - j + 1);
					mzd_clear_bits(A->x[e], i, j, length);
				}
			}
			mzd_slice_write_elem(A, i, Q->values[i], 1);
		}
	}

	if(r != A->nrows) {
		mzd_slice_t *R = mzd_slice_init_window(A, r, 0, A->nrows, A->ncols);
		mzd_slice_set_ui(R, 0);
		mzd_slice_free_window(R);
	}

	mzp_free(P);
	mzp_free(Q);

	return r;
}
Beispiel #5
0
int run(void *_p, unsigned long long *data, int *data_len) {
  struct elim_params *p = (struct elim_params *)_p;
#ifndef HAVE_LIBPAPI
  *data_len = 2;
#else
  *data_len = MIN(papi_array_len + 1, *data_len);
#endif
  int papi_res;

  mzd_t *A = mzd_init(p->m, p->n);

  if(p->r != 0) {
    mzd_t *L, *U;
    L = mzd_init(p->m, p->m);
    U = mzd_init(p->m, p->n);
    mzd_randomize(U);
    mzd_randomize(L);
    for (rci_t i = 0; i < p->m; ++i) {

      for (rci_t j = i + 1; j < p->m; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, p->m - j);
        mzd_clear_bits(L, i, j, length);
      }
      mzd_write_bit(L,i,i, 1);

      for (rci_t j = 0; j < i && j < p->n; j+=m4ri_radix) {
        int const length = MIN(m4ri_radix, i - j);
        mzd_clear_bits(U, i, j, length);
      }
      if(i < p->r) {
        mzd_write_bit(U, i, i, 1);
      } else {
        for (rci_t j = i; j < p->n; j+=m4ri_radix) {
          int const length = MIN(m4ri_radix, p->n - i);
          mzd_clear_bits(U, i, j, length);
        }
      }
    }
    mzd_mul(A,L,U,0);
    mzd_free(L);
    mzd_free(U);
  } else {
    mzd_randomize(A);
  }

  mzp_t *P = mzp_init(A->nrows);
  mzp_t *Q = mzp_init(A->ncols);

#ifndef HAVE_LIBPAPI
  data[0] = walltime(0);
  data[1] = cpucycles();
#else
  int array_len = *data_len - 1;
  unsigned long long t0 = PAPI_get_virt_usec();
  papi_res = PAPI_start_counters((int*)papi_events, array_len);
  if (papi_res)
    m4ri_die("");
#endif
  if(strcmp(p->algorithm, "m4ri") == 0)
    p->r = mzd_echelonize_m4ri(A, 0, 0);
  else if(strcmp(p->algorithm, "ple") == 0)
    p->r = mzd_ple(A, P, Q, 0);
  else if(strcmp(p->algorithm, "mmpf") == 0)
    p->r = _mzd_ple_russian(A, P, Q, 0);
  else
    m4ri_die("unknown algorithm %s",p->algorithm);
#ifndef HAVE_LIBPAPI
  data[1] = cpucycles() - data[1];
  data[0] = walltime(data[0]);
#else
  mzp_free(P);
  mzp_free(Q);

  PAPI_stop_counters((long long*)&data[1], array_len);
  t0 = PAPI_get_virt_usec() - t0;
  data[0] = t0;
  for (int nv = 0; nv <= array_len; ++nv) {
    data[nv] -= loop_calibration[nv];
  }
#endif
  mzd_free(A);
  return 0;
}
Beispiel #6
0
int test_lqup_half_rank(size_t m, size_t n) {
  printf("pluq: testing half rank m: %5zd, n: %5zd",m,n);

  mzd_t* U = mzd_init(m, n);
  mzd_t* L = mzd_init(m, m);
  mzd_t* U2 = mzd_init(m, n);
  mzd_t* L2 = mzd_init(m, m);
  mzd_t* A = mzd_init(m, n);
  mzd_randomize (U);
  mzd_randomize (L);

  size_t i,j;
  for (i=0; i<m && i<n; ++i){
    mzd_write_bit(U,i,i, 1);
    for (j=0; j<i;++j)
      mzd_write_bit(U,i,j, 0);
    if (i%2)
      for (j=i; j<n;++j)
	mzd_write_bit(U,i,j, 0);
    for (j=i+1; j<m;++j)
      mzd_write_bit(L,i,j, 0);
    mzd_write_bit(L,i,i, 1);
  }
  
  mzd_mul(A, L, U, 0);

  mzd_t* Acopy = mzd_copy (NULL,A);



  mzp_t* Pt = mzp_init(m);
  mzp_t* Q = mzp_init(n);
  int r = mzd_pluq(A, Pt, Q, 0);

  for (i=0; i<r; ++i){
    for (j=0; j<i;++j)
      mzd_write_bit (L2, i, j, mzd_read_bit(A,i,j));
    for (j=i+1; j<n;++j)
      mzd_write_bit (U2, i, j, mzd_read_bit(A,i,j));
  }
  for (i=r; i<m; i++)
    for (j=0; j<r;++j)
      mzd_write_bit (L2, i, j, mzd_read_bit(A,i,j));
  for (i=0; i<r; ++i){
    mzd_write_bit(L2,i,i, 1);
    mzd_write_bit(U2,i,i, 1);
  }

  mzd_apply_p_left(Acopy, Pt);
  mzd_apply_p_right_trans(Acopy, Q);
  mzd_addmul(Acopy,L2,U2,0);

  int status = 0;
  for ( i=0; i<m; ++i) {
    for ( j=0; j<n; ++j){
      if (mzd_read_bit(Acopy,i,j)){
	status = 1;
      }
    }
    if(status)
      break;
  }
  if (status)
    printf(" ... FAILED\n");
  else
    printf (" ... passed\n");
  mzd_free(U);
  mzd_free(L);
  mzd_free(U2);
  mzd_free(L2);
  mzd_free(A);
  mzd_free(Acopy);
  mzp_free(Pt);
  mzp_free(Q);
  return status;
}
Beispiel #7
0
int test_lqup_full_rank (size_t m, size_t n){
  printf("pluq: testing full rank m: %5zu, n: %5zu",m,n);

  mzd_t* U = mzd_init (m,n);
  mzd_t* L = mzd_init (m,m);
  mzd_t* U2 = mzd_init (m,n);
  mzd_t* L2 = mzd_init (m,m);
  mzd_t* A = mzd_init (m,n);
  mzd_randomize (U);
  mzd_randomize (L);

  size_t i,j;
  for (i=0; i<m; ++i){
    for (j=0; j<i && j<n;++j)
      mzd_write_bit(U,i,j, 0);
    for (j=i+1; j<m;++j)
      mzd_write_bit(L,i,j, 0);
    if(i<n)
      mzd_write_bit(U,i,i, 1);
    mzd_write_bit(L,i,i, 1);
  }
  
  mzd_mul(A, L, U, 2048);

  mzd_t* Acopy = mzd_copy (NULL,A);

  mzp_t* P = mzp_init(m);
  mzp_t* Q = mzp_init(n);
  mzd_pluq(A, P, Q, 2048);

  for (i=0; i<m; ++i){
    for (j=0; j<i && j <n;++j)
      mzd_write_bit (L2, i, j, mzd_read_bit(A,i,j));
    for (j=i+1; j<n;++j)
      mzd_write_bit (U2, i, j, mzd_read_bit(A,i,j));
  }
  
  for (i=0; i<n && i<m; ++i){
    mzd_write_bit(L2,i,i, 1);
    mzd_write_bit(U2,i,i, 1);
  }
  mzd_addmul(Acopy,L2,U2,0);
  int status = 0;
  for ( i=0; i<m; ++i)
    for ( j=0; j<n; ++j){
      if (mzd_read_bit (Acopy,i,j)){
	status = 1;
      }
    }
  if (status){
    printf(" ... FAILED\n");
  }  else
    printf (" ... passed\n");
  mzd_free(U);
  mzd_free(L);
  mzd_free(U2);
  mzd_free(L2);
  mzd_free(A);
  mzd_free(Acopy);
  mzp_free(P);
  mzp_free(Q);
  return status;
}