Exemple #1
0
int unpackdouble_array(double **valp, uint32_t* size_val, Buf buffer)
{
	uint32_t i = 0;

	if (unpack32(size_val, buffer))
		return SLURM_ERROR;

	*valp = xmalloc_nz((*size_val) * sizeof(double));
	for (i = 0; i < *size_val; i++) {
		if (unpackdouble((*valp) + i, buffer))
			return SLURM_ERROR;
	}
	return SLURM_SUCCESS;
}
Exemple #2
0
        /**
         * @brief       Perform inverse DWT (periodic boundaries) on 3D data.
         *
         * @param  wc   Wavelet presentation of 3D data.
         * @param  img  Reconstructed signal.
         */
        void
        idpwt3      (const Matrix <T> & wc, Matrix <T> & img)
        {

            // assign dwt to result image
            img = wc;

# pragma omp parallel default (shared) num_threads (_num_threads)
            {

                T * wcplo, * wcphi, * templo, * temphi, * temptop, * tmp;

                size_t stride;
                int sl1 = _sl1_scale,
                    sl2 = _sl2_scale,
                    sl3 = _sl3_scale;
                const int t_num = omp_get_thread_num ();

                // loop over levels of backwards DWT
                for (int j = _min_level; j < _max_level; j++)
                {

                    // update stride
                    stride = 6 * sl3 * t_num;
                    tmp = & _temp [stride];
                    templo  = & _temp [2 * sl3 + stride];
                    temphi  = & _temp [3 * sl3 + stride];
                    temptop = & _temp [4 * sl3 + stride];

# pragma omp for schedule (OMP_SCHEDULE)
                    // loop over lines along third dimension ('third') of result image
                    for (int c1_loc = 0; c1_loc < 2 * sl1 * 2 * sl2; c1_loc++)
                    {

                        int c1_glob = (c1_loc % (2 * sl1)) + (c1_loc / (2 * sl1)) * _sl1;

                        // copy lowpass part of current line to temporary memory
                        unpackdouble (& img [c1_glob], sl3, _ld12, 0, templo);

                        // copy highpass part of current line to temporary memory
                        unpackdouble (& img [c1_glob + sl3 * _ld12], sl3, _ld12, 0, temphi);

                        // perform lowpass reconstruction
                        uplo (templo, sl3, tmp);
                        // perform highpass reconstruction
                        uphi (temphi, sl3, temptop);

                        // fusion of reconstruction parts
                        adddouble (tmp, temptop, sl3 * 2, tmp);

                        // write back reconstructed line
                        packdouble (tmp, sl3 * 2, _ld12, 0, & img [c1_glob]);

                    } // loop over lines along third dimension of result image

                    // update stride
                    stride = 6 * sl2 * t_num;
                    tmp = & _temp [stride];
                    templo  = & _temp [2 * sl2 + stride];
                    temphi  = & _temp [3 * sl2 + stride];
                    temptop = & _temp [4 * sl2 + stride];

# pragma omp for schedule (OMP_SCHEDULE)
                    // loop over lines along second dimension ('rows') of result image
                    for (int c1_loc = 0; c1_loc < 2 * sl1 * 2 * sl3; c1_loc++)
                    {

                        int c1_glob = (c1_loc / (2 * sl1)) * _sl1 * _sl2;

                        // copy lowpass part of current line to temporary memory
                        unpackdouble (& img [c1_glob], sl2, _sl1, c1_loc % (2 * sl1), templo);

                        // copy highpass part of current line to temporary memory
                        unpackdouble (& img [c1_glob + sl2 * _sl1], sl2, _sl1, c1_loc % (2 * sl1), temphi);

                        // perform lowpass reconstruction
                        uplo (templo, sl2, tmp);
                        // perform highpass reconstruction
                        uphi (temphi, sl2, temptop);

                        // fusion of reconstruction parts
                        adddouble (tmp, temptop, sl2 * 2, tmp);

                        // write back reconstructed line
                        packdouble (tmp, sl2 * 2, _sl1, c1_loc % (2 * sl1), & img [c1_glob]);

                    } // loop over lines along second dimension of result image

                    // update stride
                    stride = 5 * sl1 * t_num;
                    tmp = & _temp [stride];
                    templo = & _temp [    sl1 + stride];
                    temphi = & _temp [3 * sl1 + stride];

# pragma omp for schedule (OMP_SCHEDULE)
                    // loop  over lines along first dimension ('columns') of result image
                    for (int c2_loc = 0; c2_loc < 2 * sl2 * 2 * sl3; c2_loc++)
                    {

                        int c2_glob = (c2_loc / (2 * sl2)) * _sl2 * _sl1 + (c2_loc % (2 * sl2)) * _sl1;

                        // assign address of current line's lowpass part
                        wcplo = & img [c2_glob];
                        // assign address of current line's highpass part
                        wcphi = & img [c2_glob + sl1];

                        // copy lowpass part to temporary memory
                        copydouble (wcplo, tmp, sl1);

                        // perform lowpass reconstruction
                        uplo (wcplo, sl1, templo);
                        // perform highpass reconstruction
                        uphi (wcphi, sl1, temphi);

                        // combine reconstructed parts and write back to current line
                        adddouble (templo, temphi, sl1 * 2, wcplo);

                    } // loop over lines along first dimension ('columns') of result image

                    // update current row / column size
                    sl2 *= 2;
                    sl1 *= 2;
                    sl3 *= 2;

                } // loop over levels of backwards DWT

            } // omp parallel

        }
void
_fp_pack(
	fp_simd_type	*pfpsd,	/* Pointer to simulator data */
	unpacked	*pu,	/* unpacked operand */
	uint_t		n,	/* register where datum starts */
	enum fp_op_type type)	/* type of datum */

{
	switch (type) {
	case fp_op_int32:
		{
			int32_t		x;

			packint32(pfpsd, pu, &x);
			if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem))
				pfpsd->fp_current_write_freg(&x, n, pfpsd);
			break;
		}
	case fp_op_int64:
		{
			int64_t		x;

			packint64(pfpsd, pu, &x);
			if ((n & 0x1) == 1)	/* fix register encoding */
				n = (n & 0x1e) | 0x20;
			if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem))
			    pfpsd->fp_current_write_dreg(&x, DOUBLE(n), pfpsd);
			break;
		}
	case fp_op_single:
		{
			single_type	x;

			packsingle(pfpsd, pu, &x);
			if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem))
				pfpsd->fp_current_write_freg(&x, n, pfpsd);
			break;
		}
	case fp_op_double:
		{
			union {
				double_type	x[2];
				uint32_t	y[2];
				uint64_t	ll;
			} db;

			packdouble(pfpsd, pu, &db.x[0], &db.y[1]);
			if (!(pfpsd->fp_current_exceptions &
			    pfpsd->fp_fsrtem)) {
				if ((n & 0x1) == 1) /* fix register encoding */
					n = (n & 0x1e) | 0x20;
				pfpsd->fp_current_write_dreg(&db.ll, DOUBLE(n),
					pfpsd);
			}
			break;
		}
	case fp_op_extended:
		{
			union {
				extended_type	x;
				uint32_t	y[4];
				uint64_t	ll[2];
			} ex;
			unpacked	U;
			int		k;
			switch (pfpsd->fp_precision) {
							/*
							 * Implement extended
							 * rounding precision
							 * mode.
							 */
			case fp_single:
				{
					single_type	tx;

					packsingle(pfpsd, pu, &tx);
					pu = &U;
					unpacksingle(pfpsd, pu, tx);
					break;
				}
			case fp_double:
				{
					double_type	tx;
					uint_t		ty;

					packdouble(pfpsd, pu, &tx, &ty);
					pu = &U;
					unpackdouble(pfpsd, pu, tx, ty);
					break;
				}
			case fp_precision_3:	/* rounded to 64 bits */
				{
					k = pu->exponent + EXTENDED_BIAS;
					if (k >= 0) k = 113-64;
					else	k = 113-64-k;
					fpu_rightshift(pu, 113-64);
					round(pfpsd, pu);
					pu->sticky = pu->rounded = 0;
					pu->exponent += k;
					fpu_normalize(pu);
					break;
				}
			}
			packextended(pfpsd, pu, &ex.x, &ex.y[1],
						&ex.y[2], &ex.y[3]);
			if (!(pfpsd->fp_current_exceptions &
			    pfpsd->fp_fsrtem)) {
				if ((n & 0x1) == 1) /* fix register encoding */
					n = (n & 0x1e) | 0x20;
				pfpsd->fp_current_write_dreg(&ex.ll[0],
							QUAD_E(n), pfpsd);
				pfpsd->fp_current_write_dreg(&ex.ll[1],
							QUAD_F(n), pfpsd);
			}

			break;
		}
	}
}
Exemple #4
0
        /**
         * @brief       Perform forward DWT (periodic boundaries) on 3D data.
         *
         * @param  sig  Signal to be transformed.
         * @param  res  Decomposed signal.
         */
        void
        dpwt3       (const Matrix <T> & sig, Matrix <T> & res)
        {

            // assign signal to result matrix
            res = sig;

# pragma omp parallel default (shared), num_threads (_num_threads)
            {

                T * wcplo, * wcphi, * templo, * temphi, * tmp;

                size_t stride;
                int sl1 = _sl1,
                    sl2 = _sl2,
                    sl3 = _sl3;
                const int t_num = omp_get_thread_num ();

                // loop over levels of DWT
                for (int j = (_max_level-1); j >= _min_level; --j)
                {

                    // update stride
                    stride = sl1 * t_num;
                    // update thread's temporary memory address
                    tmp = & _temp [stride];

# pragma omp for schedule (OMP_SCHEDULE)
                    // loop over lines along first dimension ('columns') of image
                    for (int c2_loc = 0; c2_loc < sl2 * sl3; c2_loc++)
                    {

                        int c2_glob = (c2_loc / sl2) * _sl1 * _sl2 + (c2_loc % sl2) * _sl1;

                        // access to lowpass part of DWT
                        wcplo = & res [c2_glob /** _sl1*/];
                        // access to highpass part of DWT
                        wcphi = & res [c2_glob /** _sl1*/ + sl1 / 2];

                        // copy part of image to _temp memory
                        copydouble (wcplo, tmp, sl1);

                        // apply low pass filter on current line and write to result matrix
                        downlo (tmp, sl1, wcplo);
                        // apply high pass filter on current line and write to result matrix
                        downhi (tmp, sl1, wcphi);

                    } // loop over lines along first dimension

                    // update stride
                    stride = 2 * sl2 * t_num;
                    // update thread's temporary memory address
                    tmp = & _temp [stride];
                    templo = & _temp [      sl2 + stride];
                    temphi = & _temp [1.5 * sl2 + stride];

# pragma omp for schedule (OMP_SCHEDULE)
                    // loop over lines along second dimension ('rows') of image
                    for (int c1_loc = 0; c1_loc < sl1 * sl3; c1_loc++)
                    {

                        int c1_glob = (c1_loc / sl1) * _sl1 * _sl2;

                        // copy c1-th line of image to temp_mem
                        unpackdouble (& res [c1_glob], sl2, _sl1, c1_loc % sl1, tmp);

                        // apply low pass filter on current line and write to _temp mem
                        downlo (tmp, sl2, templo);
                        // apply high pass filter on current line and write to _temp mem
                        downhi (tmp, sl2, temphi);

                        // write temp lowpass result to result matrix
                        packdouble (templo, sl2 / 2, _sl1, c1_loc % sl1, & res [c1_glob]);
                        // write temp highpass result to result matrix
                        packdouble (temphi, sl2 / 2, _sl1, c1_loc % sl1, & res [c1_glob + sl2 / 2 * _sl1]);

                    } // loop over lines along second dimension

                    // update stride
                    stride = 2 * sl3 * t_num;
                    // update thread's temporary memory address
                    tmp = & _temp [stride];
                    templo = & _temp [      sl3 + stride];
                    temphi = & _temp [1.5 * sl3 + stride];

# pragma omp for schedule (OMP_SCHEDULE)
                    // loop over lines along third dimension ('third') of image
                    for (int c1_loc = 0; c1_loc < sl1 * sl2; c1_loc++)
                    {

                        int c1_glob = (c1_loc % sl1) + (c1_loc / sl1) * _sl1;

                        // copy c2-th line of image to temp_mem
                        unpackdouble (& res [c1_glob], sl3, _ld12, 0, tmp);

                        // apply low pass filter on current line and write to _temp mem
                        downlo (tmp, sl3, templo);
                        // apply high pass filter on current line and write to _temp mem
                        downhi (tmp, sl3, temphi);

                        // write temp lowpass result to result matrix
                        packdouble (templo, sl3 / 2, _ld12, 0, & res [c1_glob]);
                        // write temp highpass result to result matrix
                        packdouble (temphi, sl3 / 2, _ld12, 0, & res [c1_glob + sl3 / 2 * _ld12]);

                    } // loop over lines along third dimension

                    // reduce dimensions for next level
                    sl1 /= 2;
                    sl2 /= 2;
                    sl3 /= 2;

                } // loop over levels of DWT

            } // omp parallel

        }