void elem_sub(elem_ptr res, elem_srcptr op1, elem_srcptr op2, const ring_t ring) { switch (ring->type) { case TYPE_FMPZ: fmpz_sub(res, op1, op2); break; case TYPE_LIMB: *((mp_ptr) res) = *((mp_srcptr) op1) - *((mp_srcptr) op2); break; case TYPE_POLY: elem_poly_sub(res, op1, op2, ring); break; case TYPE_MOD: { switch (RING_PARENT(ring)->type) { case TYPE_LIMB: *((mp_ptr) res) = n_submod(*((mp_srcptr) op1), *((mp_srcptr) op2), ring->nmod.n); break; case TYPE_FMPZ: fmpz_sub(res, op1, op2); if (fmpz_sgn(res) < 0) fmpz_add(res, res, RING_MODULUS(ring)); break; default: NOT_IMPLEMENTED("sub (mod)", ring); } } break; case TYPE_FRAC: elem_frac_sub(res, op1, op2, ring); break; case TYPE_COMPLEX: elem_sub(REALPART(res, ring), REALPART(op1, ring), REALPART(op2, ring), ring->parent); elem_sub(IMAGPART(res, ring), IMAGPART(op1, ring), IMAGPART(op2, ring), ring->parent); break; default: NOT_IMPLEMENTED("sub", ring); } }
void RNNSoftmaxLayer::backwardStep(int seqIdx) { elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron); // for (int i=0; i<m_numNeuron; ++i) { // printf("%f=%f-%f\t", m_inputErrs[seqIdx][i], m_outputActs[seqIdx][i], m_outputErrs[seqIdx][i]); // } // printf("\n"); }
void _elem_poly_sub(elem_ptr res, elem_srcptr poly1, long len1, elem_srcptr poly2, long len2, const ring_t ring) { long i, min; long size = ring->size; if (ring->type == TYPE_FMPZ && 0) { _fmpz_poly_sub(res, poly1, len1, poly2, len2); return; } min = FLINT_MIN(len1, len2); for (i = 0; i < min; i++) elem_sub(INDEX(res, i, size), SRC_INDEX(poly1, i, size), SRC_INDEX(poly2, i, size), ring); if (poly1 != res) for (i = min; i < len1; i++) elem_set(INDEX(res, i, size), SRC_INDEX(poly1, i, size), ring); for (i = min; i < len2; i++) elem_neg(INDEX(res, i, size), SRC_INDEX(poly2, i, size), ring); }
void elem_mat_solve_fflu_precomp(elem_mat_t X, const long * perm, const elem_mat_t FFLU, const elem_mat_t B, const ring_t ring) { elem_ptr T; long i, j, k, m, n; const ring_struct * ering = RING_PARENT(ring); n = X->r; m = X->c; ELEM_TMP_INIT(T, ering); elem_mat_set_perm(X, perm, B, ring); for (k = 0; k < m; k++) { /* Fraction-free forward substitution */ for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { elem_mul(XX(j, k), XX(j, k), LU(i, i), ering); elem_mul(T, LU(j, i), XX(i, k), ering); elem_sub(XX(j, k), XX(j, k), T, ering); if (i > 0) { elem_divexact(XX(j, k), XX(j, k), LU(i-1, i-1), ering); } } } /* Fraction-free back substitution */ for (i = n - 2; i >= 0; i--) { elem_mul(XX(i, k), XX(i, k), LU(n-1, n-1), ering); for (j = i + 1; j < n; j++) { elem_mul(T, XX(j, k), LU(i, j), ering); elem_sub(XX(i, k), XX(i, k), T, ering); } elem_divexact(XX(i, k), XX(i, k), LU(i, i), ering); } } ELEM_TMP_CLEAR(T, ering); }
void _elem_vec_scalar_submul(elem_ptr res, elem_srcptr vec, long len, elem_srcptr c, const ring_t ring) { long i, size = ring->size; elem_ptr t; ELEM_TMP_INIT(t, ring); for (i = 0; i < len; i++) { elem_mul(t, SRC_INDEX(vec, i, size), c, ring); elem_sub(INDEX(res, i, size), SRC_INDEX(res, i, size), t, ring); } ELEM_TMP_CLEAR(t, ring); }
void RNN_MSELayer::feedBackward(int inputSeqLen) { for (int seqIdx=1; seqIdx<=inputSeqLen; ++seqIdx) { elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron); } }
void RNNMSELayer::feedBackward(int inputSeqLen) { #pragma omp parallel for for (int seqIdx=1; seqIdx<=inputSeqLen; ++seqIdx) { elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron); } }
void RNNMSELayer::backwardStep(int seqIdx) { elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron); }
slint mpi_splitk_dummy(elements_t *s, k2c_func k2c, void *ci, elements_t *sx, slint *send_stats, int size, int rank, MPI_Comm comm) /* sl_proto, sl_func mpi_splitk_dummy */ { slint i, j, k, t; slint local_sb_counts[size]; slint _send_stats[size]; elements_t sb[size], sb_current[size]; elements_t src, dst, end; if (s == NULL || ci == NULL || sx == NULL) return -1; /* need send_buffers with at least one element per foreign process */ if (sx->size < size - 1) return -2; rti_tstart(rti_tid_mpi_splitk_dummy); rti_tstart(rti_tid_mpi_splitk_dummy_init); if (send_stats == NULL) send_stats = _send_stats; /* initials */ j = sx->size; k = size - 1; for (i = 0; i < size; ++i) { /* init the local send_buffer counters */ local_sb_counts[i] = 0; /* prepare the send_buffers */ if (i != rank) { elem_assign_at(sx, sx->size - j, &sb[i]); sb[i].size = (j / k) + (j % k != 0); j -= sb[i].size; --k; } else elem_null(&sb[i]); elem_assign(&sb[i], &sb_current[i]); send_stats[i] = 0; } elem_assign(s, &src); elem_assign(s, &dst); elem_assign_at(s, s->size, &end); rti_tstop(rti_tid_mpi_splitk_dummy_init); rti_tstart(rti_tid_mpi_splitk_dummy_loop); while (1) { /* distribute the elements to the send_buffer, as long as possible (elements left and target send_buffer not full) */ while (src.keys != end.keys) { /* compute the target-process of the current element */ t = (k2c)(src.keys, src.keys - s->keys, ci); ++send_stats[t]; #ifndef K2C_ONLY /* is the local process the target? */ if (t == rank) { /* if necessary, move the element on the local process */ if (src.keys != dst.keys) elem_copy(&src, &dst); /* update the dst-position */ elem_inc(&dst); } else /* the target is another process (need to send the element) */ { /* break, if the according send_buffer is full */ if (local_sb_counts[t] >= sb[t].size) break; /* copy the element to the according send_buffer */ elem_copy(&src, &sb_current[t]); elem_inc(&sb_current[t]); ++local_sb_counts[t]; if (local_sb_counts[t] >= sb[t].size) { elem_sub(&sb_current[t], local_sb_counts[t]); local_sb_counts[t] = 0; } } #endif /* update the src-position */ elem_inc(&src); } break; } rti_tstop(rti_tid_mpi_splitk_dummy_loop); rti_tstop(rti_tid_mpi_splitk_dummy); return 0; }