Esempio n. 1
0
void
elem_sub(elem_ptr res, elem_srcptr op1, elem_srcptr op2, const ring_t ring)
{
    switch (ring->type)
    {
        case TYPE_FMPZ:
            fmpz_sub(res, op1, op2);
            break;

        case TYPE_LIMB:
            *((mp_ptr) res) = *((mp_srcptr) op1) - *((mp_srcptr) op2);
            break;

        case TYPE_POLY:
            elem_poly_sub(res, op1, op2, ring);
            break;

        case TYPE_MOD:
            {
                switch (RING_PARENT(ring)->type)
                {
                    case TYPE_LIMB:
                        *((mp_ptr) res) = n_submod(*((mp_srcptr) op1), *((mp_srcptr) op2), ring->nmod.n);
                        break;

                    case TYPE_FMPZ:
                        fmpz_sub(res, op1, op2);
                        if (fmpz_sgn(res) < 0)
                            fmpz_add(res, res, RING_MODULUS(ring));
                        break;

                    default:
                        NOT_IMPLEMENTED("sub (mod)", ring);
                }
            }
            break;

        case TYPE_FRAC:
            elem_frac_sub(res, op1, op2, ring);
            break;

        case TYPE_COMPLEX:
            elem_sub(REALPART(res, ring), REALPART(op1, ring), REALPART(op2, ring), ring->parent);
            elem_sub(IMAGPART(res, ring), IMAGPART(op1, ring), IMAGPART(op2, ring), ring->parent);
            break;

        default:
            NOT_IMPLEMENTED("sub", ring);
    }
}
Esempio n. 2
0
void RNNSoftmaxLayer::backwardStep(int seqIdx) {
	elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron);
	// for (int i=0; i<m_numNeuron; ++i) {
	// 	printf("%f=%f-%f\t", m_inputErrs[seqIdx][i], m_outputActs[seqIdx][i], m_outputErrs[seqIdx][i]);
	// }
	// printf("\n");
}
Esempio n. 3
0
void
_elem_poly_sub(elem_ptr res, elem_srcptr poly1, long len1,
    elem_srcptr poly2, long len2, const ring_t ring)
{
    long i, min;
    long size = ring->size;

    if (ring->type == TYPE_FMPZ && 0)
    {
        _fmpz_poly_sub(res, poly1, len1, poly2, len2);
        return;
    }

    min = FLINT_MIN(len1, len2);

    for (i = 0; i < min; i++)
        elem_sub(INDEX(res, i, size), SRC_INDEX(poly1, i, size), SRC_INDEX(poly2, i, size), ring);

    if (poly1 != res)
        for (i = min; i < len1; i++)
            elem_set(INDEX(res, i, size), SRC_INDEX(poly1, i, size), ring);

    for (i = min; i < len2; i++)
        elem_neg(INDEX(res, i, size), SRC_INDEX(poly2, i, size), ring);
}
Esempio n. 4
0
void
elem_mat_solve_fflu_precomp(elem_mat_t X,
                    const long * perm,
                    const elem_mat_t FFLU, const elem_mat_t B, const ring_t ring)
{
    elem_ptr T;
    long i, j, k, m, n;
    const ring_struct * ering = RING_PARENT(ring);

    n = X->r;
    m = X->c;

    ELEM_TMP_INIT(T, ering);
    elem_mat_set_perm(X, perm, B, ring);

    for (k = 0; k < m; k++)
    {
        /* Fraction-free forward substitution */
        for (i = 0; i < n - 1; i++)
        {
            for (j = i + 1; j < n; j++)
            {
                elem_mul(XX(j, k), XX(j, k), LU(i, i), ering);
                elem_mul(T, LU(j, i), XX(i, k), ering);
                elem_sub(XX(j, k), XX(j, k), T, ering);
                if (i > 0)
                {
                    elem_divexact(XX(j, k), XX(j, k), LU(i-1, i-1), ering);
                }
            }
        }

        /* Fraction-free back substitution */
        for (i = n - 2; i >= 0; i--)
        {
            elem_mul(XX(i, k), XX(i, k), LU(n-1, n-1), ering);
            for (j = i + 1; j < n; j++)
            {
                elem_mul(T, XX(j, k), LU(i, j), ering);
                elem_sub(XX(i, k), XX(i, k), T, ering);
            }
            elem_divexact(XX(i, k), XX(i, k), LU(i, i), ering);
        }
    }

    ELEM_TMP_CLEAR(T, ering);
}
void
_elem_vec_scalar_submul(elem_ptr res, elem_srcptr vec, long len, elem_srcptr c, const ring_t ring)
{
    long i, size = ring->size;
    elem_ptr t;

    ELEM_TMP_INIT(t, ring);

    for (i = 0; i < len; i++)
    {
        elem_mul(t, SRC_INDEX(vec, i, size), c, ring);
        elem_sub(INDEX(res, i, size), SRC_INDEX(res, i, size), t, ring);
    }

    ELEM_TMP_CLEAR(t, ring);
}
Esempio n. 6
0
void RNN_MSELayer::feedBackward(int inputSeqLen) {
	for (int seqIdx=1; seqIdx<=inputSeqLen; ++seqIdx) {
		elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron);
	}
}
Esempio n. 7
0
void RNNMSELayer::feedBackward(int inputSeqLen) {
	#pragma omp parallel for
	for (int seqIdx=1; seqIdx<=inputSeqLen; ++seqIdx) {
		elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron);
	}
}
Esempio n. 8
0
void RNNMSELayer::backwardStep(int seqIdx) {
	elem_sub(m_inputErrs[seqIdx], m_outputActs[seqIdx], m_outputErrs[seqIdx], m_numNeuron);
}
slint mpi_splitk_dummy(elements_t *s, k2c_func k2c, void *ci, elements_t *sx, slint *send_stats, int size, int rank, MPI_Comm comm) /* sl_proto, sl_func mpi_splitk_dummy */
{
  slint i, j, k, t;

  slint local_sb_counts[size];
  
  slint _send_stats[size];

  elements_t sb[size], sb_current[size];
  elements_t src, dst, end;

  
  if (s == NULL || ci == NULL || sx == NULL) return -1;

  /* need send_buffers with at least one element per foreign process */
  if (sx->size < size - 1) return -2;

  rti_tstart(rti_tid_mpi_splitk_dummy);
  rti_tstart(rti_tid_mpi_splitk_dummy_init);

  if (send_stats == NULL) send_stats = _send_stats;

  /* initials */
  j = sx->size;
  k = size - 1;
  for (i = 0; i < size; ++i)
  {
    /* init the local send_buffer counters */
    local_sb_counts[i] = 0;

    /* prepare the send_buffers */
    if (i != rank)
    {
      elem_assign_at(sx, sx->size - j, &sb[i]);
      sb[i].size = (j / k) + (j % k != 0);
      j -= sb[i].size;
      --k;

    } else elem_null(&sb[i]);
    elem_assign(&sb[i], &sb_current[i]);
    
    send_stats[i] = 0;
  }

  elem_assign(s, &src);
  elem_assign(s, &dst);
  elem_assign_at(s, s->size, &end);

  rti_tstop(rti_tid_mpi_splitk_dummy_init);
  rti_tstart(rti_tid_mpi_splitk_dummy_loop);

  while (1)
  {
    /* distribute the elements to the send_buffer, as long as possible (elements left and target send_buffer not full) */
    while (src.keys != end.keys)
    {
      /* compute the target-process of the current element */
      t = (k2c)(src.keys, src.keys - s->keys, ci);

      ++send_stats[t];

#ifndef K2C_ONLY

      /* is the local process the target? */
      if (t == rank)
      {
        /* if necessary, move the element on the local process */
        if (src.keys != dst.keys) elem_copy(&src, &dst);

        /* update the dst-position */
        elem_inc(&dst);

      } else /* the target is another process (need to send the element) */
      {
        /* break, if the according send_buffer is full */
        if (local_sb_counts[t] >= sb[t].size) break;

        /* copy the element to the according send_buffer */
        elem_copy(&src, &sb_current[t]);
        elem_inc(&sb_current[t]);

        ++local_sb_counts[t];
        
        if (local_sb_counts[t] >= sb[t].size)
        {
          elem_sub(&sb_current[t], local_sb_counts[t]);
          local_sb_counts[t] = 0;
        }
      }

#endif
      
      /* update the src-position */
      elem_inc(&src);
    }

    break;
  }

  rti_tstop(rti_tid_mpi_splitk_dummy_loop);
  rti_tstop(rti_tid_mpi_splitk_dummy);
  
  return 0;
}