示例#1
0
void D1(unsigned int *M, const unsigned int k1, const unsigned int k2,
        const unsigned int i1, const unsigned int i2,
		    const unsigned int j1, const unsigned int j2,
		    const unsigned int rows, const unsigned int cols,
        unsigned int size, unsigned int prime,
        unsigned int *neg_inv_piv, unsigned int blocksize, int thrds) {
  if (i2 <= k1 || j2 <= k1)
    return;

  if (size <= blocksize) {
    base_case (M, k1, i1, j1, rows, cols, size, prime, neg_inv_piv);
  } else {
    size = size / 2;

    unsigned int km = (k1+k2) / 2 ;
    unsigned int im = (i1+i2) / 2;
    unsigned int jm = (j1+j2) / 2;

    // parallel - start
# pragma omp task mergeable
    // X11
    D1( M, k1, km, i1, im, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X12
    D1( M, k1, km, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X21
    D1( M, k1, km, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X22
    D1( M, k1, km, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp taskwait
    // parallel - end

    // parallel - start
# pragma omp task mergeable
    // X11
    D1( M, km+1, k2, i1, im, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X12
    D1( M, km+1, k2, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X21
    D1( M, km+1, k2, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable
    // X22
    D1( M, km+1, k2, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp taskwait
    // parallel - end
  }
}
    void construct(std::string text_file) {
        int64_t n, start, end;
        _text = parse_text(text_file);
        int64_t text_size = _text.size();
        int64_t remaining_text_size = _text.size();
        _size = 0;
        start = 0;
        end = 0;
        _wm = new int64_t[(_block_size + 1) * 4];
        if (text_size <= _block_size) {
            n = 0;
            _current_block_size = text_size;
            remaining_text_size = 0;
        } else {
            _current_block_size = _block_size;
            remaining_text_size -= _current_block_size;
            n = (std::ceil((double) remaining_text_size / _block_size));
            end = std::min(start + _current_block_size - 1, text_size);
        }
        _old_block = _text.substr(0, _current_block_size);
        _size += _current_block_size;
        base_case();
        start = end + 1;
        for (int64_t i = 0; i < n; i++) {
            std::cout << "Iteration " << i + 1 << " of " << n << "\n";
            end = std::min(start + _block_size - 1, text_size - 1);
            _new_block = _text.substr(start, end - start + 1);
            _current_block_size = std::min(end - start + 1, remaining_text_size);
            _size += _current_block_size; //update csa size
            compute_rank_long();
            compute_rank_short();
            compute_psi();
            start = end + 1;
            remaining_text_size -= _current_block_size;
            _old_psi.swap(_new_psi);
            _new_psi.reset();
            _old_block = std::move(_new_block);
//            for (int64_t i = 0; i < _size; i++) {
//                std::cout << "PSI(" << i << ") = " << _old_psi.psi(i) << "\n";
//            }
        }
        _new_psi = std::move(_old_psi);
        sample_sa();
        sample_isa();
    }
示例#3
0
void A( unsigned int *M, const unsigned int k1, const unsigned int k2,
        const unsigned int i1, const unsigned int i2,
		    const unsigned int j1, const unsigned int j2,
		    const unsigned int rows, const unsigned int cols,
        unsigned int size, unsigned int prime,
        unsigned int *neg_inv_piv, unsigned int blocksize, int thrds) {
  if (i2 <= k1 || j2 <= k1)
    return;
  //
  //if (size <= 2) {
  if (size <= blocksize) {
    base_case (M, k1, i1, j1, rows, cols, size, prime, neg_inv_piv);
  } else {
    size = size / 2;

    unsigned int km = (k1+k2) / 2 ;
    unsigned int im = (i1+i2) / 2;
    unsigned int jm = (j1+j2) / 2;

    // forward step

    A(M, k1, km, i1, im, j1, jm, rows, cols, size,
      prime, neg_inv_piv, blocksize, thrds);
    // parallel - start
#pragma omp parallel num_threads(thrds)
{
# pragma omp task mergeable
    B1( M, k1, km, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp task mergeable 
    C1( M, k1, km, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
# pragma omp taskwait
    // parallel - end
    D1( M, k1, km, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, blocksize, thrds);
    // backward step
    A(M, km+1, k2, im+1, i2, jm+1, j2, rows, cols, size,
      prime, neg_inv_piv, blocksize, thrds);
}
  }
}
示例#4
0
node_idx ArrayOperation::apply(const level k, const node_idx p[], const unsigned int size)
{
	node_idx r = 0;
	if (k > m_forest->top_level())
		throw OperationException("Level out of range.\n");

	if (k == 0) {
		return base_case(p, size);
	}

	r = m_cache[k]->hit(k, p, size);
	if (r >= 0) {
		//If the node has been deleted, restore it before returning.
		if (m_forest->FDDL_NODE(k, r).deleted())
			r = m_forest->CheckIn(k, r);
		return r;
	}

	r = m_forest->NewNode(k);

	int maxsize = 0;
	for (unsigned int i=0; i < size; ++i) {
		Node* node = &m_forest->FDDL_NODE(k,i);
		if (node->m_size > maxsize)
			maxsize = node->m_size;
	}

	Node* nodes[size];

	for (unsigned int i=0; i < size; ++i) {
		nodes[i] = &m_forest->FDDL_NODE(k,p[i]);
	}

	arc_idx index[size];
	for (unsigned int i=0; i < size; ++i) {
		index[i] = 0;
	}
	bool done = false;

	while (!done) {
		done = true;
		arc_idx idx[size];	
		node_idx val[size];
		for (unsigned int i=0; i < size; ++i) {
			if (index[i] < nodes[i]->size() && nodes[i]->sparse()){
				idx[i] = m_forest->SPARSE_INDEX(k, nodes[i], index[i]);
				val[i] = m_forest->SPARSE_ARC(k, nodes[i], index[i]);
				++index[i];
				done = false;
			}
			else if (index[i] < nodes[i]->size()){
				idx[i] = index[i];
				val[i] = m_forest->FULL_ARC(k, nodes[i], index[i]);
				++index[i];
				done = false;
			}
			else {
				idx[i] = -1;
				val[i] = 0;
			}
		}
		arc_idx mindx = 0;
		for (unsigned int i=1; i < size; ++i) {
			if (val[i] >= 0 && idx[i] < idx[mindx]) {
				mindx = i;
			}
		}
		for (unsigned int i=1; i < size; ++i) {
			if (idx[i] != idx[mindx])
				val[i] = 0;
		}
		if (idx[mindx] != 0) 
			m_forest->SetArc(k, r, idx[mindx], apply(k-1, val, size));
	}
	r = m_forest->CheckIn(k, r);
	m_cache[k]->add(k, p, size, r);
	return r;
}
示例#5
0
void A( unsigned int *M, const unsigned int k1, const unsigned int k2,
        const unsigned int i1, const unsigned int i2,
		    const unsigned int j1, const unsigned int j2,
		    const unsigned int rows, const unsigned int cols,
        unsigned int size, unsigned int prime,
        unsigned int *neg_inv_piv, unsigned int blocksize, int thrds) {
  if (i2 <= k1 || j2 <= k1)
    return;
  //
  //if (size <= 2) {
  if (size <= blocksize) {
    base_case (M, k1, i1, j1, rows, cols, size, prime, neg_inv_piv);
  } else {
    size = size / 2;

    unsigned int km = (k1+k2) / 2 ;
    unsigned int im = (i1+i2) / 2;
    unsigned int jm = (j1+j2) / 2;

    kaapic_spawn_attr_t attr;

    kaapic_spawn_attr_init(&attr);

    // forward step
    /*
    A(M, k1, km, i1, im, j1, jm, rows, cols, size,
      prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, A,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);

    kaapic_sync();

    // parallel - start
    kaapic_begin_parallel(KAAPIC_FLAG_DEFAULT);
    /*
    B1( M, k1, km, i1, im, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, thrds, blocksize);
    C1( M, k1, km, im+1, i2, j1, jm, rows, cols, size,
        prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, B1,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);
    kaapic_spawn(0, 14, C1,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);
    kaapic_sync();
    kaapic_end_parallel(KAAPIC_FLAG_DEFAULT);
    // parallel - end

    /*
    D1( M, k1, km, im+1, i2, jm+1, j2, rows, cols, size,
        prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, D1,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);

    kaapic_sync();

    // backward step

    /*
    A(M, km+1, k2, im+1, i2, jm+1, j2, rows, cols, size,
      prime, neg_inv_piv, thrds, blocksize);
    */
    kaapic_spawn(&attr, 14, A,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, rows*cols, M,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, km+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, k2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, im+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, i2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, jm+1,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, j2,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, rows,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, cols,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, size,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT64, 1, prime,
        KAAPIC_MODE_RW, KAAPIC_TYPE_UINT64, sizeof(neg_inv_piv)/sizeof(neg_inv_piv[0]), neg_inv_piv,
        //KAAPIC_MODE_RW, KAAPIC_TYPE_INT, 4, neg_inv_piv,
        KAAPIC_MODE_V, KAAPIC_TYPE_UINT32, 1, blocksize,
        KAAPIC_MODE_V, KAAPIC_TYPE_INT, 1, thrds);

    kaapic_sync();
  }
}