/** * @brief Computes the contribution to the FSR scalar flux from a segment. * @details This method integrates the angular flux for a Track segment across * energy groups and polar angles, and tallies it into the FSR scalar * flux, and updates the Track's angular flux. * @param curr_segment a pointer to the Track segment of interest * @param azim_index a pointer to the azimuthal angle index for this segment * @param track_flux a pointer to the Track's angular flux * @param fsr_flux a pointer to the temporary FSR flux buffer */ void VectorizedSolver::tallyScalarFlux(segment* curr_segment, int azim_index, FP_PRECISION* track_flux, FP_PRECISION* fsr_flux) { int tid = omp_get_thread_num(); int fsr_id = curr_segment->_region_id; FP_PRECISION* delta_psi = &_delta_psi[tid*_num_groups]; FP_PRECISION* exponentials = &_thread_exponentials[tid*_polar_times_groups]; computeExponentials(curr_segment, exponentials); /* Set the FSR scalar flux buffer to zero */ memset(fsr_flux, 0.0, _num_groups * sizeof(FP_PRECISION)); /* Tally the flux contribution from segment to FSR's scalar flux */ /* Loop over polar angles */ for (int p=0; p < _num_polar; p++) { /* Loop over each energy group vector length */ for (int v=0; v < _num_vector_lengths; v++) { /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) delta_psi[e] = track_flux(p,e) - _reduced_sources(fsr_id,e); /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) delta_psi[e] *= exponentials(p,e); /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) fsr_flux[e] += delta_psi[e] * _polar_weights(azim_index,p); /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) track_flux(p,e) -= delta_psi[e]; } } /* Atomically increment the FSR scalar flux from the temporary array */ omp_set_lock(&_FSR_locks[fsr_id]); { #ifdef SINGLE vsAdd(_num_groups, &_scalar_flux(fsr_id,0), fsr_flux, &_scalar_flux(fsr_id,0)); #else vdAdd(_num_groups, &_scalar_flux(fsr_id,0), fsr_flux, &_scalar_flux(fsr_id,0)); #endif } omp_unset_lock(&_FSR_locks[fsr_id]); }
/* * Class: com_intel_analytics_bigdl_mkl_MKL * Method: vsAdd * Signature: (I[FI[FI[FI)V */ JNIEXPORT void JNICALL Java_com_intel_analytics_bigdl_mkl_MKL_vsAdd (JNIEnv * env, jclass cls, jint n, jfloatArray a, jint aOffset, jfloatArray b, jint bOffset, jfloatArray y, jint yOffset) { jfloat * jni_a = (*env)->GetPrimitiveArrayCritical(env, a, JNI_FALSE); jfloat * jni_b = (*env)->GetPrimitiveArrayCritical(env, b, JNI_FALSE); jfloat * jni_y = (*env)->GetPrimitiveArrayCritical(env, y, JNI_FALSE); vsAdd( n, jni_a + aOffset, jni_b + bOffset, jni_y + yOffset); (*env)->ReleasePrimitiveArrayCritical(env, y, jni_y, 0); (*env)->ReleasePrimitiveArrayCritical(env, b, jni_b, 0); (*env)->ReleasePrimitiveArrayCritical(env, a, jni_a, 0); }
/** * @brief Computes the contribution to the FSR scalar flux from a Track segment. * @details This method integrates the angular flux for a Track segment across * energy groups and polar angles, and tallies it into the FSR scalar * flux, and updates the Track's angular flux. * @param curr_segment a pointer to the Track segment of interest * @param azim_index a pointer to the azimuthal angle index for this segment * @param track_flux a pointer to the Track's angular flux * @param fsr_flux a pointer to the temporary FSR flux buffer * @param fwd */ void VectorizedSolver::scalarFluxTally(segment* curr_segment, int azim_index, FP_PRECISION* track_flux, FP_PRECISION* fsr_flux, bool fwd){ int tid = omp_get_thread_num(); int fsr_id = curr_segment->_region_id; FP_PRECISION length = curr_segment->_length; FP_PRECISION* sigma_t = curr_segment->_material->getSigmaT(); /* The change in angular flux along this Track segment in the FSR */ FP_PRECISION delta_psi; FP_PRECISION* exponentials = &_thread_exponentials[tid*_polar_times_groups]; computeExponentials(curr_segment, exponentials); /* Set the FSR scalar flux buffer to zero */ memset(fsr_flux, 0.0, _num_groups * sizeof(FP_PRECISION)); /* Tally the flux contribution from segment to FSR's scalar flux */ /* Loop over polar angles */ for (int p=0; p < _num_polar; p++){ /* Loop over each energy group vector length */ for (int v=0; v < _num_vector_lengths; v++) { /* Loop over energy groups within this vector */ #pragma simd vectorlength(VEC_LENGTH) private(delta_psi) for (int e=v*VEC_LENGTH; e < (v+1)*VEC_LENGTH; e++) { delta_psi = (track_flux(p,e) - _reduced_source(fsr_id,e)) * exponentials(p,e); fsr_flux[e] += delta_psi * _polar_weights(azim_index,p); track_flux(p,e) -= delta_psi; } } } /* Atomically increment the FSR scalar flux from the temporary array */ omp_set_lock(&_FSR_locks[fsr_id]); { #ifdef SINGLE vsAdd(_num_groups, &_scalar_flux(fsr_id,0), fsr_flux, &_scalar_flux(fsr_id,0)); #else vdAdd(_num_groups, &_scalar_flux(fsr_id,0), fsr_flux, &_scalar_flux(fsr_id,0)); #endif } omp_unset_lock(&_FSR_locks[fsr_id]); return; }
// To force linking in all needed Intel routines // See compileCX for details (linking against .a) void Dummy(void) { vsAdd(); vsSub(); vsDiv(); vsSqr(); vsMul(); vsAbs(); vsInv(); vsSin(); vsCos(); vsSinCos(); vsTan(); vsAsin(); vsAcos(); vsAtan(); vsAtan2(); vsSinh(); vsCosh(); vsTanh(); vsAsinh(); vsAcosh(); vsAtanh(); vsPow(); vsPowx(); vsSqrt(); vsCbrt(); vsInvSqrt(); vsInvCbrt(); vsHypot(); vsFloor(); vsCeil(); vsRound(); vsTrunc(); vsRint(); vsNearbyInt(); vsModf(); vsExp(); vsLn(); vsLog10(); vsErf(); vsErfc(); vsErfInv(); }
void caffe_add<float>(const int n, const float* a, const float* b, float* y) { vsAdd(n, a, b, y); }
DLLEXPORT void s_vector_add( const int n, const float x[], const float y[], float result[] ){ vsAdd( n, x, y, result ); }