コード例 #1
0
ファイル: mat-elim-omp-task.c プロジェクト: ederc/pLA
void D1(unsigned int *M, const unsigned int k1, const unsigned int k2,
        const unsigned int i1, const unsigned int i2,
		    const unsigned int j1, const unsigned int j2,
		    const unsigned int rows, const unsigned int cols,
        unsigned int size, unsigned int prime,
        unsigned int *neg_inv_piv, unsigned int blocksize, int thrds) {
  if (i2 <= k1 || j2 <= k1)
    return;

  if (size <= blocksize) {
    base_case (M, k1, i1, j1, rows, cols, size, prime, neg_inv_piv);
  } else {
    size = size / 2;

    unsigned int km = (k1+k2) / 2 ;
    unsigned int im = (i1+i2) / 2;
    unsigned int jm = (j1+j2) / 2;

    // parallel - start
# pragma omp task mergeable
    // X11
    D1( M, k1, km, i1, im, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X12
    D1( M, k1, km, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X21
    D1( M, k1, km, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X22
    D1( M, k1, km, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp taskwait
    // parallel - end

    // parallel - start
# pragma omp task mergeable
    // X11
    D1( M, km+1, k2, i1, im, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X12
    D1( M, km+1, k2, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X21
    D1( M, km+1, k2, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X22
    D1( M, km+1, k2, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp taskwait
    // parallel - end
  }
}
コード例 #2
0
    void construct(std::string text_file) {
        int64_t n, start, end;
        _text = parse_text(text_file);
        int64_t text_size = _text.size();
        int64_t remaining_text_size = _text.size();
        _size = 0;
        start = 0;
        end = 0;
        _wm = new int64_t[(_block_size + 1) * 4];
        if (text_size <= _block_size) {
            n = 0;
            _current_block_size = text_size;
            remaining_text_size = 0;
        } else {
            _current_block_size = _block_size;
            remaining_text_size -= _current_block_size;
            n = (std::ceil((double) remaining_text_size / _block_size));
            end = std::min(start + _current_block_size - 1, text_size);
        }
        _old_block = _text.substr(0, _current_block_size);
        _size += _current_block_size;
        base_case();
        start = end + 1;
        for (int64_t i = 0; i < n; i++) {
            std::cout << "Iteration " << i + 1 << " of " << n << "\n";
            end = std::min(start + _block_size - 1, text_size - 1);
            _new_block = _text.substr(start, end - start + 1);
            _current_block_size = std::min(end - start + 1, remaining_text_size);
            _size += _current_block_size; //update csa size
            compute_rank_long();
            compute_rank_short();
            compute_psi();
            start = end + 1;
            remaining_text_size -= _current_block_size;
            _old_psi.swap(_new_psi);
            _new_psi.reset();
            _old_block = std::move(_new_block);
//            for (int64_t i = 0; i < _size; i++) {
//                std::cout << "PSI(" << i << ") = " << _old_psi.psi(i) << "\n";
//            }
        }
        _new_psi = std::move(_old_psi);
        sample_sa();
        sample_isa();
    }
コード例 #3
0
ファイル: mat-elim-omp-task.c プロジェクト: ederc/pLA
void A( unsigned int *M, const unsigned int k1, const unsigned int k2,
        const unsigned int i1, const unsigned int i2,
		    const unsigned int j1, const unsigned int j2,
		    const unsigned int rows, const unsigned int cols,
        unsigned int size, unsigned int prime,
        unsigned int *neg_inv_piv, unsigned int blocksize, int thrds) {
  if (i2 <= k1 || j2 <= k1)
    return;
  //
  //if (size <= 2) {
  if (size <= blocksize) {
    base_case (M, k1, i1, j1, rows, cols, size, prime, neg_inv_piv);
  } else {
    size = size / 2;

    unsigned int km = (k1+k2) / 2 ;
    unsigned int im = (i1+i2) / 2;
    unsigned int jm = (j1+j2) / 2;

    // forward step

    A(M, k1, km, i1, im, j1, jm, rows, cols, size,
      prime, neg_inv_piv, blocksize, thrds);
    // parallel - start
#pragma omp parallel num_threads(thrds)
{
# pragma omp task mergeable
    B1( M, k1, km, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable 
    C1( M, k1, km, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp taskwait
    // parallel - end
    D1( M, k1, km, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
    // backward step
    A(M, km+1, k2, im+1, i2, jm+1, j2, rows, cols, size,
      prime, neg_inv_piv, blocksize, thrds);
}
  }
}
コード例 #4
0
ファイル: arrayoperation.cpp プロジェクト: atomopawn/FDDL
node_idx ArrayOperation::apply(const level k, const node_idx p[], const unsigned int size)
{
	node_idx r = 0;
	if (k > m_forest->top_level())
		throw OperationException("Level out of range.\n");

	if (k == 0) {
		return base_case(p, size);
	}

	r = m_cache[k]->hit(k, p, size);
	if (r >= 0) {
		//If the node has been deleted, restore it before returning.
		if (m_forest->FDDL_NODE(k, r).deleted())
			r = m_forest->CheckIn(k, r);
		return r;
	}

	r = m_forest->NewNode(k);

	int maxsize = 0;
	for (unsigned int i=0; i < size; ++i) {
		Node* node = &m_forest->FDDL_NODE(k,i);
		if (node->m_size > maxsize)
			maxsize = node->m_size;
	}

	Node* nodes[size];

	for (unsigned int i=0; i < size; ++i) {
		nodes[i] = &m_forest->FDDL_NODE(k,p[i]);
	}

	arc_idx index[size];
	for (unsigned int i=0; i < size; ++i) {
		index[i] = 0;
	}
	bool done = false;

	while (!done) {
		done = true;
		arc_idx idx[size];	
		node_idx val[size];
		for (unsigned int i=0; i < size; ++i) {
			if (index[i] < nodes[i]->size() && nodes[i]->sparse()){
				idx[i] = m_forest->SPARSE_INDEX(k, nodes[i], index[i]);
				val[i] = m_forest->SPARSE_ARC(k, nodes[i], index[i]);
				++index[i];
				done = false;
			}
			else if (index[i] < nodes[i]->size()){
				idx[i] = index[i];
				val[i] = m_forest->FULL_ARC(k, nodes[i], index[i]);
				++index[i];
				done = false;
			}
			else {
				idx[i] = -1;
				val[i] = 0;
			}
		}
		arc_idx mindx = 0;
		for (unsigned int i=1; i < size; ++i) {
			if (val[i] >= 0 && idx[i] < idx[mindx]) {
				mindx = i;
			}
		}
		for (unsigned int i=1; i < size; ++i) {
			if (idx[i] != idx[mindx])
				val[i] = 0;
		}
		if (idx[mindx] != 0) 
			m_forest->SetArc(k, r, idx[mindx], apply(k-1, val, size));
	}
	r = m_forest->CheckIn(k, r);
	m_cache[k]->add(k, p, size, r);
	return r;
}
コード例 #5
0
ファイル: mat-elim-kaapi.c プロジェクト: ederc/pLA
void A( unsigned int *M, const unsigned int k1, const unsigned int k2,
        const unsigned int i1, const unsigned int i2,
		    const unsigned int j1, const unsigned int j2,
		    const unsigned int rows, const unsigned int cols,
        unsigned int size, unsigned int prime,
        unsigned int *neg_inv_piv, unsigned int blocksize, int thrds) {
  if (i2 <= k1 || j2 <= k1)
    return;
  //
  //if (size <= 2) {
  if (size <= blocksize) {
    base_case (M, k1, i1, j1, rows, cols, size, prime, neg_inv_piv);
  } else {
    size = size / 2;

    unsigned int km = (k1+k2) / 2 ;
    unsigned int im = (i1+i2) / 2;
    unsigned int jm = (j1+j2) / 2;

    kaapic_spawn_attr_t attr;

    kaapic_spawn_attr_init(&attr);

    // forward step
    /*
    A(M, k1, km, i1, im, j1, jm, rows, cols, size,
      prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, A,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);

    kaapic_sync();

    // parallel - start
    kaapic_begin_parallel(KAAPIC_FLAG_DEFAULT);
    /*
    B1( M, k1, km, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, thrds, blocksize);
    C1( M, k1, km, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, B1,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);
    kaapic_spawn(0, 14, C1,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);
    kaapic_sync();
    kaapic_end_parallel(KAAPIC_FLAG_DEFAULT);
    // parallel - end

    /*
    D1( M, k1, km, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, D1,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);

    kaapic_sync();

    // backward step

    /*
    A(M, km+1, k2, im+1, i2, jm+1, j2, rows, cols, size,
      prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, A,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);

    kaapic_sync();
  }
}