int unpackdouble_array(double **valp, uint32_t* size_val, Buf buffer) { uint32_t i = 0; if (unpack32(size_val, buffer)) return SLURM_ERROR; *valp = xmalloc_nz((*size_val) * sizeof(double)); for (i = 0; i < *size_val; i++) { if (unpackdouble((*valp) + i, buffer)) return SLURM_ERROR; } return SLURM_SUCCESS; }
/** * @brief Perform inverse DWT (periodic boundaries) on 3D data. * * @param wc Wavelet presentation of 3D data. * @param img Reconstructed signal. */ void idpwt3 (const Matrix <T> & wc, Matrix <T> & img) { // assign dwt to result image img = wc; # pragma omp parallel default (shared) num_threads (_num_threads) { T * wcplo, * wcphi, * templo, * temphi, * temptop, * tmp; size_t stride; int sl1 = _sl1_scale, sl2 = _sl2_scale, sl3 = _sl3_scale; const int t_num = omp_get_thread_num (); // loop over levels of backwards DWT for (int j = _min_level; j < _max_level; j++) { // update stride stride = 6 * sl3 * t_num; tmp = & _temp [stride]; templo = & _temp [2 * sl3 + stride]; temphi = & _temp [3 * sl3 + stride]; temptop = & _temp [4 * sl3 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along third dimension ('third') of result image for (int c1_loc = 0; c1_loc < 2 * sl1 * 2 * sl2; c1_loc++) { int c1_glob = (c1_loc % (2 * sl1)) + (c1_loc / (2 * sl1)) * _sl1; // copy lowpass part of current line to temporary memory unpackdouble (& img [c1_glob], sl3, _ld12, 0, templo); // copy highpass part of current line to temporary memory unpackdouble (& img [c1_glob + sl3 * _ld12], sl3, _ld12, 0, temphi); // perform lowpass reconstruction uplo (templo, sl3, tmp); // perform highpass reconstruction uphi (temphi, sl3, temptop); // fusion of reconstruction parts adddouble (tmp, temptop, sl3 * 2, tmp); // write back reconstructed line packdouble (tmp, sl3 * 2, _ld12, 0, & img [c1_glob]); } // loop over lines along third dimension of result image // update stride stride = 6 * sl2 * t_num; tmp = & _temp [stride]; templo = & _temp [2 * sl2 + stride]; temphi = & _temp [3 * sl2 + stride]; temptop = & _temp [4 * sl2 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along second dimension ('rows') of result image for (int c1_loc = 0; c1_loc < 2 * sl1 * 2 * sl3; c1_loc++) { int c1_glob = (c1_loc / (2 * sl1)) * _sl1 * _sl2; // copy lowpass part of current line to temporary memory unpackdouble (& img [c1_glob], sl2, _sl1, c1_loc % (2 * sl1), templo); // copy highpass part of current line to temporary memory unpackdouble (& img [c1_glob + sl2 * _sl1], sl2, _sl1, c1_loc % (2 * sl1), temphi); // perform lowpass reconstruction uplo (templo, sl2, tmp); // perform highpass reconstruction uphi (temphi, sl2, temptop); // fusion of reconstruction parts adddouble (tmp, temptop, sl2 * 2, tmp); // write back reconstructed line packdouble (tmp, sl2 * 2, _sl1, c1_loc % (2 * sl1), & img [c1_glob]); } // loop over lines along second dimension of result image // update stride stride = 5 * sl1 * t_num; tmp = & _temp [stride]; templo = & _temp [ sl1 + stride]; temphi = & _temp [3 * sl1 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along first dimension ('columns') of result image for (int c2_loc = 0; c2_loc < 2 * sl2 * 2 * sl3; c2_loc++) { int c2_glob = (c2_loc / (2 * sl2)) * _sl2 * _sl1 + (c2_loc % (2 * sl2)) * _sl1; // assign address of current line's lowpass part wcplo = & img [c2_glob]; // assign address of current line's highpass part wcphi = & img [c2_glob + sl1]; // copy lowpass part to temporary memory copydouble (wcplo, tmp, sl1); // perform lowpass reconstruction uplo (wcplo, sl1, templo); // perform highpass reconstruction uphi (wcphi, sl1, temphi); // combine reconstructed parts and write back to current line adddouble (templo, temphi, sl1 * 2, wcplo); } // loop over lines along first dimension ('columns') of result image // update current row / column size sl2 *= 2; sl1 *= 2; sl3 *= 2; } // loop over levels of backwards DWT } // omp parallel }
void _fp_pack( fp_simd_type *pfpsd, /* Pointer to simulator data */ unpacked *pu, /* unpacked operand */ uint_t n, /* register where datum starts */ enum fp_op_type type) /* type of datum */ { switch (type) { case fp_op_int32: { int32_t x; packint32(pfpsd, pu, &x); if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem)) pfpsd->fp_current_write_freg(&x, n, pfpsd); break; } case fp_op_int64: { int64_t x; packint64(pfpsd, pu, &x); if ((n & 0x1) == 1) /* fix register encoding */ n = (n & 0x1e) | 0x20; if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem)) pfpsd->fp_current_write_dreg(&x, DOUBLE(n), pfpsd); break; } case fp_op_single: { single_type x; packsingle(pfpsd, pu, &x); if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem)) pfpsd->fp_current_write_freg(&x, n, pfpsd); break; } case fp_op_double: { union { double_type x[2]; uint32_t y[2]; uint64_t ll; } db; packdouble(pfpsd, pu, &db.x[0], &db.y[1]); if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem)) { if ((n & 0x1) == 1) /* fix register encoding */ n = (n & 0x1e) | 0x20; pfpsd->fp_current_write_dreg(&db.ll, DOUBLE(n), pfpsd); } break; } case fp_op_extended: { union { extended_type x; uint32_t y[4]; uint64_t ll[2]; } ex; unpacked U; int k; switch (pfpsd->fp_precision) { /* * Implement extended * rounding precision * mode. */ case fp_single: { single_type tx; packsingle(pfpsd, pu, &tx); pu = &U; unpacksingle(pfpsd, pu, tx); break; } case fp_double: { double_type tx; uint_t ty; packdouble(pfpsd, pu, &tx, &ty); pu = &U; unpackdouble(pfpsd, pu, tx, ty); break; } case fp_precision_3: /* rounded to 64 bits */ { k = pu->exponent + EXTENDED_BIAS; if (k >= 0) k = 113-64; else k = 113-64-k; fpu_rightshift(pu, 113-64); round(pfpsd, pu); pu->sticky = pu->rounded = 0; pu->exponent += k; fpu_normalize(pu); break; } } packextended(pfpsd, pu, &ex.x, &ex.y[1], &ex.y[2], &ex.y[3]); if (!(pfpsd->fp_current_exceptions & pfpsd->fp_fsrtem)) { if ((n & 0x1) == 1) /* fix register encoding */ n = (n & 0x1e) | 0x20; pfpsd->fp_current_write_dreg(&ex.ll[0], QUAD_E(n), pfpsd); pfpsd->fp_current_write_dreg(&ex.ll[1], QUAD_F(n), pfpsd); } break; } } }
/** * @brief Perform forward DWT (periodic boundaries) on 3D data. * * @param sig Signal to be transformed. * @param res Decomposed signal. */ void dpwt3 (const Matrix <T> & sig, Matrix <T> & res) { // assign signal to result matrix res = sig; # pragma omp parallel default (shared), num_threads (_num_threads) { T * wcplo, * wcphi, * templo, * temphi, * tmp; size_t stride; int sl1 = _sl1, sl2 = _sl2, sl3 = _sl3; const int t_num = omp_get_thread_num (); // loop over levels of DWT for (int j = (_max_level-1); j >= _min_level; --j) { // update stride stride = sl1 * t_num; // update thread's temporary memory address tmp = & _temp [stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along first dimension ('columns') of image for (int c2_loc = 0; c2_loc < sl2 * sl3; c2_loc++) { int c2_glob = (c2_loc / sl2) * _sl1 * _sl2 + (c2_loc % sl2) * _sl1; // access to lowpass part of DWT wcplo = & res [c2_glob /** _sl1*/]; // access to highpass part of DWT wcphi = & res [c2_glob /** _sl1*/ + sl1 / 2]; // copy part of image to _temp memory copydouble (wcplo, tmp, sl1); // apply low pass filter on current line and write to result matrix downlo (tmp, sl1, wcplo); // apply high pass filter on current line and write to result matrix downhi (tmp, sl1, wcphi); } // loop over lines along first dimension // update stride stride = 2 * sl2 * t_num; // update thread's temporary memory address tmp = & _temp [stride]; templo = & _temp [ sl2 + stride]; temphi = & _temp [1.5 * sl2 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along second dimension ('rows') of image for (int c1_loc = 0; c1_loc < sl1 * sl3; c1_loc++) { int c1_glob = (c1_loc / sl1) * _sl1 * _sl2; // copy c1-th line of image to temp_mem unpackdouble (& res [c1_glob], sl2, _sl1, c1_loc % sl1, tmp); // apply low pass filter on current line and write to _temp mem downlo (tmp, sl2, templo); // apply high pass filter on current line and write to _temp mem downhi (tmp, sl2, temphi); // write temp lowpass result to result matrix packdouble (templo, sl2 / 2, _sl1, c1_loc % sl1, & res [c1_glob]); // write temp highpass result to result matrix packdouble (temphi, sl2 / 2, _sl1, c1_loc % sl1, & res [c1_glob + sl2 / 2 * _sl1]); } // loop over lines along second dimension // update stride stride = 2 * sl3 * t_num; // update thread's temporary memory address tmp = & _temp [stride]; templo = & _temp [ sl3 + stride]; temphi = & _temp [1.5 * sl3 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along third dimension ('third') of image for (int c1_loc = 0; c1_loc < sl1 * sl2; c1_loc++) { int c1_glob = (c1_loc % sl1) + (c1_loc / sl1) * _sl1; // copy c2-th line of image to temp_mem unpackdouble (& res [c1_glob], sl3, _ld12, 0, tmp); // apply low pass filter on current line and write to _temp mem downlo (tmp, sl3, templo); // apply high pass filter on current line and write to _temp mem downhi (tmp, sl3, temphi); // write temp lowpass result to result matrix packdouble (templo, sl3 / 2, _ld12, 0, & res [c1_glob]); // write temp highpass result to result matrix packdouble (temphi, sl3 / 2, _ld12, 0, & res [c1_glob + sl3 / 2 * _ld12]); } // loop over lines along third dimension // reduce dimensions for next level sl1 /= 2; sl2 /= 2; sl3 /= 2; } // loop over levels of DWT } // omp parallel }