Beispiel #1
0
    void DecoderBinaural::process(const double* inputs, double* outputs)
	{
        m_decoder->process(inputs, m_channels_vector_double);

        --m_index;
        m_channels_inputs_left[0][m_index] = m_channels_vector_double[0];
        outputs[1] = outputs[0] = cblas_sdot(m_impulses_size, m_channels_inputs_left[0]+m_index, 1, m_impulses_vector[0], 1);
        for(int i = 1; i < m_number_of_virtual_channels; i++)
        {
            m_channels_inputs_left[i][m_index] = m_channels_vector_double[i];
            m_channels_inputs_right[i][m_index] = m_channels_vector_double[i];
            outputs[0] += cblas_sdot(m_impulses_size, m_channels_inputs_left[i]+m_index, 1, m_impulses_vector[m_number_of_virtual_channels - i], 1);
            outputs[1] += cblas_sdot(m_impulses_size, m_channels_inputs_right[i]+m_index, 1, m_impulses_vector[i], 1);
        }
        if(m_index <= 0)
        {
            m_index = m_impulses_size;
            cblas_scopy(m_impulses_size, m_channels_inputs_left[0], 1, m_channels_inputs_left[0]+m_impulses_size, 1);
            for(int i = 1; i < m_number_of_virtual_channels; i++)
            {
                cblas_scopy(m_impulses_size, m_channels_inputs_left[i], 1, m_channels_inputs_left[i]+m_impulses_size, 1);
                cblas_scopy(m_impulses_size, m_channels_inputs_right[i], 1, m_channels_inputs_right[i]+m_impulses_size, 1);
            }
        }
    }
Beispiel #2
0
void preprocess1(int panelSz, int D, float *XX, float*X, float*Y, float lamda, float*Z, float*B){
    // panelSz: the number of points to process in each round
    int i,j;
    // step 1: compute all X[i]'*X[j] (i>j)
    for (i=panelSz-1;i>0;i--)
        for (j=i-1;j>=0;j--){
            XX[i*panelSz+j] = cblas_sdot(D, &(X[i*D]), 1, &(X[j*D]), 1);
         // printf("XX[%d]=%8.4f, X[%d]=%8.4f, X[%d]=%8.4f\n", i*panelSz+j, XX[i*panelSz+j], i*D, X[i*D], j*D, X[j*D]);
        }

	
    // step 2: compute all Z vectors
    // Z0=lamda*X[0], B=lamda*X[0]*Y[0]
    cblas_scopy(D, X, 1, Z, 1);  

    cblas_sscal(D, lamda, Z, 1);
    float alpha=lamda*Y[0];
    cblas_scopy(D, X, 1, B, 1);
    cblas_sscal(D, alpha, B, 1);
    for (i=1; i<panelSz;i++){
        cblas_scopy(D, &(X[i*D]), 1, &(Z[i*D]),1);
        // Z[i] = lamda*(X[i] - sum_{j<i} XX[i,j]*Z[j]);
        for (j=0;j<i;j++){
            cblas_saxpy(D, -1*XX[i*panelSz+j], &(Z[j*D]), 1, &(Z[i*D]), 1);
        }
        cblas_sscal(D, lamda, &(Z[i*D]), 1);
        // B = lamda*(Y[i] - X[i]*B) X[i] + B;
        float alpha = lamda*(Y[i]-cblas_sdot(D, &(X[i*D]), 1, B, 1));
        cblas_saxpy(D, alpha, &(X[i*D]), 1, B, 1);
    }
}
void replace_nans_avgs(int vec_blocksize, type_precision* vec,
                       int rows , int cols, list<long int>* ar_nan_idxs)
{
    type_precision* ones = new type_precision[rows];

    for (int i = 0; i < rows; i++)
    {
        ones[i] = 1.0;
    }

    for (int i = 0; i < vec_blocksize * cols; i++)
    {
        type_precision sum = cblas_sdot(rows, ones, 1, &vec[i * rows], 1);
        type_precision avg = sum / (rows - ar_nan_idxs[i].size());
        for (list<long int>::iterator it = ar_nan_idxs[i].begin();
             it != ar_nan_idxs[i].end();
             it++)
        {
            int idx = i * rows + (*it);
            vec[idx] = avg;
        }
    }

    delete []ones;
}
Beispiel #4
0
real THBlas_(dot)(long n, real *x, long incx, real *y, long incy)
{
  if(n == 1)
  {
    incx = 1;
    incy = 1;
  }

#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
  {
    int i_n = (int)n;
    int i_incx = (int)incx;
    int i_incy = (int)incy;

#if defined(TH_REAL_IS_DOUBLE)
    return cblas_ddot(i_n, x, i_incx, y, i_incy);
#else
    return cblas_sdot(i_n, x, i_incx, y, i_incy);
#endif
  }
#endif
  {
    long i;
    real sum = 0;
    for(i = 0; i < n; i++)
    sum += x[i*incx]*y[i*incy];
    return sum;
  }
}
Beispiel #5
0
/* Trains a network by presenting an example and 
 * adjusts the weights by stochastic gradient 
 * descent to reduce a squared hinge loss
 */
void train(nnet_t* n, sparse_t* v, int target){
    int i;
    /* Forward pass */
    cblas_scopy(n->hidden,n->b1,1,n->a1,1);
    for(i=0; i<v->nz; i++){
        cblas_saxpy(n->hidden, v->x[i], n->W1[v->idx[i]], 1, n->a1, 1);
    }
    activation(n->a1,n->x1,n->g1,n->hidden);
    n->a2 = n->b2 + cblas_sdot(n->hidden, n->W2, 1, n->x1, 1);
    activation(&n->a2,&n->x2,&n->g2,1);
    if(target*n->x2 > 1)
        /* Hinge loss, no error -> no need to backpropagate */
        return;
    /* Backward pass */
    n->d2 = (target-n->x2)*n->g2;
    cblas_scopy(n->hidden,n->W2,1,n->d1,1);
    for(i=0; i<n->hidden; i++)
        n->d1[i] *= n->d2*n->g1[i];
    n->b2 += n->eta*n->d2;
    cblas_saxpy(n->hidden, n->eta*n->d2, n->x1, 1, n->W2, 1);
    cblas_saxpy(n->hidden, n->eta, n->d1, 1, n->b1, 1);
    /* Sparse inputs imply sparse gradients.
     * This update saves a lot of computation
     * compared to general purpose neural net
     * implementations.
     */
    for(i=0; i<v->nz; i++){
        cblas_saxpy(n->hidden, n->eta*v->x[i], n->d1, 1, n->W1[v->idx[i]], 1);
    }
}
Beispiel #6
0
real THBlas_dot(long size, real *x, long xStride, real *y, long yStride)
{
  if(size == 1)
  {
    xStride = 1;
    yStride = 1;
  }

#if USE_CBLAS
  if( (size < INT_MAX) && (xStride < INT_MAX) && (yStride < INT_MAX) )
  {
#ifdef USE_DOUBLE
    return cblas_ddot(size, x, xStride, y, yStride);
#else
    return cblas_sdot(size, x, xStride, y, yStride);
#endif
  }
#endif
  {
    long i;
    real sum = 0;
    for(i = 0; i < size; i++)
    sum += x[i*xStride]*y[i*yStride];
    return sum;
  }
}
Beispiel #7
0
value_t<A> dot(const A& a, const B& b) {
    if constexpr (all_dma<A, B>) {
        a.ensure_cpu_up_to_date();
        b.ensure_cpu_up_to_date();

        if constexpr (all_single_precision<A, B>) {
            return cblas_sdot(etl::size(a), a.memory_start(), 1, b.memory_start(), 1);
        } else {
            return cblas_ddot(etl::size(a), a.memory_start(), 1, b.memory_start(), 1);
JNIEXPORT jfloat JNICALL Java_uncomplicate_neanderthal_CBLAS_sdot
(JNIEnv *env, jclass clazz, jint N,
 jobject X, jint offsetX, jint incX,
 jobject Y, jint offsetY, jint incY) {

    float *cX = (float *) (*env)->GetDirectBufferAddress(env, X);
    float *cY = (float *) (*env)->GetDirectBufferAddress(env, Y);
    return cblas_sdot(N, cX + offsetX, incX, cY + offsetY, incY);
};
Beispiel #9
0
 void Vector::processVelocity(const float* inputs, float* outputs)
 {
     float veclocitySum = 0.f, velocityAbscissa = 0.f, velocityOrdinate = 0.f;
     
     veclocitySum = 0.;
     for(int i = 0; i < m_number_of_channels; i++)
         veclocitySum += inputs[i];
     velocityAbscissa = cblas_sdot(m_number_of_channels, inputs, 1, m_channels_abscissa_float, 1);
     velocityOrdinate = cblas_sdot(m_number_of_channels, inputs, 1, m_channels_ordinate_float, 1);
     if(veclocitySum)
     {
         outputs[0] = velocityAbscissa / veclocitySum;
         outputs[1] = velocityOrdinate / veclocitySum;
     }
     else
     {
         outputs[0] = 0.;
         outputs[1] = 0.;
     }
 }
float HostVector<float>::Dot(const BaseVector<float> &x) const {

  assert(&x != NULL);

  const HostVector<float> *cast_x = dynamic_cast<const HostVector<float>*> (&x);

  assert(cast_x != NULL);
  assert(this->size_ == cast_x->size_);

  return cblas_sdot(this->size_, this->vec_, 1, cast_x->vec_, 1);

}
Beispiel #11
0
 /*
  * Class:     com_intel_analytics_bigdl_mkl_MKL
  * Method:    vsdot
  * Signature: (I[FII[FII)V
  */
JNIEXPORT float JNICALL Java_com_intel_analytics_bigdl_mkl_MKL_vsdot
   (JNIEnv * env, jclass cls, jint n, jfloatArray x, jint xOffset, jint incx,
   jfloatArray y, jint yOffset, jint incy) {
   jfloat * jni_x = (*env)->GetPrimitiveArrayCritical(env, x, JNI_FALSE);
   jfloat * jni_y = (*env)->GetPrimitiveArrayCritical(env, y, JNI_FALSE);

   float res = cblas_sdot(n, jni_x + xOffset, incx, jni_y + yOffset, incy);

   (*env)->ReleasePrimitiveArrayCritical(env, y, jni_y, 0);
   (*env)->ReleasePrimitiveArrayCritical(env, x, jni_x, 0);
   return res;
 }
Beispiel #12
0
  static void dotprod(const RView& X, const RView& Y, 
		      T& result)
  {
#ifdef MKL
    const float * x = X.data();
    const float * y = Y.data();
    result = cblas_sdot(DIM_N, x, 1, y, 1);
#else
    for (int i=0; i<DIM_N; ++i)
      result += X[i] * Y[i];
#endif
  }
Beispiel #13
0
/* Given an input vector v, compute the output of the network. */
float value(nnet_t* n, sparse_t* v){
    int i;
    cblas_scopy(n->hidden,n->b1,1,n->a1,1);
    for(i=0; i<v->nz; i++){
        cblas_saxpy(n->hidden, v->x[i], n->W1[v->idx[i]], 1, n->a1, 1);
    }
    activation(n->a1,n->x1,n->g1,n->hidden);
    n->a2 = n->b2;
    n->a2 += cblas_sdot(n->hidden, n->W2, 1, n->x1, 1);
    activation(&n->a2,&n->x2,&n->g2,1);
    return n->x2;
}
Beispiel #14
0
void bli_sdot( conj_t conj, int n, float* x, int incx, float* y, int incy, float* rho )
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
	*rho = cblas_sdot( n,
	                   x, incx,
	                   y, incy );
#else
	*rho = F77_sdot( &n,
	                 x, &incx,
	                        y, &incy );
#endif
}
Beispiel #15
0
JNIEXPORT jfloat JNICALL Java_edu_berkeley_bid_CBLAS_sdotxx 
(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jX, jint startX, jfloatArray jY, jint startY){
	jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE);
	jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE);
	jfloat returnValue;

	returnValue = cblas_sdot(N, X+startX, 1, Y+startY, 1);

	(*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0);
	return returnValue;
}
Beispiel #16
0
value_t<A> sum(const A& a) {
    if constexpr (is_dma<A>) {
        etl::dyn_vector<value_t<A>> ones(etl::size(a));
        ones = value_t<A>(1);

        a.ensure_cpu_up_to_date();

        [[maybe_unused]] const auto* m_a = a.memory_start();
        [[maybe_unused]] const auto* m_b = ones.memory_start();

        if constexpr (is_single_precision<A>) {
            return cblas_sdot(etl::size(a), m_a, 1, m_b, 1);
        } else if constexpr (is_double_precision<A>) {
Beispiel #17
0
 void Vector::processEnergy(const float* inputs, float* outputs)
 {
     float energySum = 0.f, energyAbscissa = 0.f, energyOrdinate = 0.f;
     cblas_scopy(m_number_of_channels, inputs, 1, m_channels_float, 1);
     for(int i = 0; i < m_number_of_channels; i++)
         m_channels_float[i] *= m_channels_float[i];
     
     energySum = cblas_sasum(m_number_of_channels, m_channels_float, 1);
     energyAbscissa = cblas_sdot(m_number_of_channels, m_channels_float, 1, m_channels_abscissa_float, 1);
     energyOrdinate = cblas_sdot(m_number_of_channels, m_channels_float, 1, m_channels_ordinate_float, 1);
     
     if(energySum)
     {
         outputs[0] = energyAbscissa / energySum;
         outputs[1] = energyOrdinate / energySum;
     }
     else
     {
         outputs[0] = 0.;
         outputs[1] = 0.;
     }
 }
Beispiel #18
0
 inline float
 dot_call( dot_params< float >& p )
 {
     //std::cout << "calling blas sdot (single precision) " << std::endl;
     float vvi = cblas_sdot(
                      p.n,
                      p.x,
                      p.inc_x,
                      p.y,
                      p.inc_y
                      );
     return vvi;
 }
Beispiel #19
0
static void
FLOAT_dot(void *a, intp stridea, void *b, intp strideb, void *res,
          intp n, void *tmp)
{
    register int na = stridea / sizeof(float);
    register int nb = strideb / sizeof(float);

    if ((sizeof(float) * na == stridea) &&
            (sizeof(float) * nb == strideb) &&
            (na >= 0) && (nb >= 0))
        *((float *)res) = cblas_sdot((int)n, (float *)a, na, (float *)b, nb);

    else
        oldFunctions[PyArray_FLOAT](a, stridea, b, strideb, res, n, tmp);
}
Beispiel #20
0
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_sdotm
(JNIEnv * env, jobject calling_obj, jint nrows, jint ncols, jfloatArray jX, jint ldx, jfloatArray jY, jint ldy, jfloatArray jZ){
	jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE);
	jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE);
	jfloat * Z = (*env)->GetPrimitiveArrayCritical(env, jZ, JNI_FALSE);
    int i, j;
    
    for (i = 0; i < ncols; i++) {
      Z[i] = cblas_sdot(nrows, X+i*ldx, 1, Y+i*ldy, 1);
    }

	(*env)->ReleasePrimitiveArrayCritical(env, jZ, Z, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0);
}
Beispiel #21
0
bool dot(REAL &inner, const RealVector &x, const RealVector &y) {
  bool flag = true;
  UINT N; INT incX, incY;
  N = x.size;
  incX = 1;
  incY = 1;
  if (NULL == &x || NULL == &y) {
    flag = false;
	  goto end;
  }
  if (x.size != y.size) {
    flag = false;
	  goto end;
  }

  inner = cblas_sdot(N, x.M, incX, y.M, incY);

end:
  return flag;
}
Beispiel #22
0
inline static void
magma_slarfxsym_v2(magma_int_t n, 
                float *A, magma_int_t lda, 
                float *V, float *TAU, 
                float *work) 
{
/*
    WORK (workspace) float real array, dimension N
*/

    magma_int_t ione = 1;
    float dtmp;
    float c_zero   =  MAGMA_S_ZERO;
    float c_neg_one=  MAGMA_S_NEG_ONE;
    float c_half   =  MAGMA_S_HALF;

    /* X = AVtau */
    blasf77_ssymv("L",&n, TAU, A, &lda, V, &ione, &c_zero, work, &ione);

    /* compute dtmp= X'*V */
#if defined(PRECISION_z) || defined(PRECISION_c)
   dtmp = c_zero;
   for (magma_int_t j = 0; j < n ; j++)
      dtmp = dtmp + MAGMA_S_CNJG(work[j]) * V[j];
    //cblas_sdot_sub(n, work, ione, V, ione, &dtmp);
#else
    dtmp = cblas_sdot(n, work, ione, V, ione);
#endif


    /* compute 1/2 X'*V*t = 1/2*dtmp*tau  */
    dtmp = -dtmp * c_half * (*TAU);

    /* compute W=X-1/2VX'Vt = X - dtmp*V */
    blasf77_saxpy(&n, &dtmp, V, &ione, work, &ione);

    /* performs the symmetric rank 2 operation A := alpha*x*y' + alpha*y*x' + A */
    blasf77_ssyr2("L", &n, &c_neg_one, work, &ione, V, &ione, A, &lda);

}
Beispiel #23
0
typename TypeTraits<typename Vector::ScalarType>::magnitude_type
  dot(const Vector& x,
      const Vector& y)
{
  int n = x.coefs.size();

#ifdef MINIFE_DEBUG
  if (y.local_size < n) {
    std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<<std::endl;
    n = y.local_size;
  }
#endif

  typedef typename Vector::ScalarType Scalar;
  typedef typename TypeTraits<typename Vector::ScalarType>::magnitude_type magnitude;

  const Scalar* xcoefs = &x.coefs[0];
  const Scalar* ycoefs = &y.coefs[0];
#if defined(MINIFE_MKL_DOUBLE)
  magnitude result = cblas_ddot(
#elif defined(MINIFE_MKL_FLOAT)
  magnitude result = cblas_sdot(
#else
  #error Unknown MINIFE_SCALAR type.
#endif
	(MKL_INT) n,
	xcoefs,
	(MKL_INT) 1,
	ycoefs,
	(MKL_INT) 1);

#ifdef HAVE_MPI
  magnitude local_dot = result, global_dot = 0;
  MPI_Datatype mpi_dtype = TypeTraits<magnitude>::mpi_type();  
  MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD);
  return global_dot;
#else
  return result;
#endif
}
Beispiel #24
0
//#define NUMERIC_PartialDeriv_CALC
void BuildMatrix(BkgFitMatrixPacker *fit,bool accum, bool debug)
{
#if 1
    (void) debug;
    fit->BuildMatrix(accum);
#else
    mat_assembly_instruction *pinst = fit->instList;
    int lineInc=0;

    // build JTJ and RHS matricies
    for (int i=0; i < fit->nInstr; i++)
    {
        double sum=0.0;

        for (int j=0; j < pinst->cnt; j++)
            sum += cblas_sdot(pinst->si[j].len,pinst->si[j].src1,1,pinst->si[j].src2,1);
        if (accum)
            * (pinst->dst) += sum;
        else
            * (pinst->dst) = sum;

        if (debug)
        {
            char *src1Name = findName(pinst->si[0].src1);
            char *src2Name = findName(pinst->si[0].src2);

            printf("%d(%s--%s)(%lf) ",i,src1Name,src2Name,* (pinst->dst));

            if (lineInc++ > 6)
            {
                printf("\n  ");
                lineInc = 0;
            }
        }
        pinst++;
    }
#endif
}
Beispiel #25
0
void
test_dot (void) {
    const double flteps = 1e-4, dbleps = 1e-6;
    {
        int N = 1;
        float alpha = 0.0f;
        float X[] = { 0.733f };
        float Y[] = { 0.825f };
        int incX = 1;
        int incY = -1;
        float expected = 0.604725f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 1)");
    };


    {
        int N = 1;
        float alpha = 0.1f;
        float X[] = { 0.733f };
        float Y[] = { 0.825f };
        int incX = 1;
        int incY = -1;
        float expected = 0.704725f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 2)");
    };


    {
        int N = 1;
        float alpha = 1.0f;
        float X[] = { 0.733f };
        float Y[] = { 0.825f };
        int incX = 1;
        int incY = -1;
        float expected = 1.604725f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 3)");
    };


    {
        int N = 1;
        float alpha = 0.0f;
        float X[] = { -0.812f };
        float Y[] = { -0.667f };
        int incX = -1;
        int incY = 1;
        float expected = 0.541604f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 4)");
    };


    {
        int N = 1;
        float alpha = 0.1f;
        float X[] = { -0.812f };
        float Y[] = { -0.667f };
        int incX = -1;
        int incY = 1;
        float expected = 0.641604f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 5)");
    };


    {
        int N = 1;
        float alpha = 1.0f;
        float X[] = { -0.812f };
        float Y[] = { -0.667f };
        int incX = -1;
        int incY = 1;
        float expected = 1.541604f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 6)");
    };


    {
        int N = 1;
        float alpha = 0.0f;
        float X[] = { 0.481f };
        float Y[] = { 0.523f };
        int incX = -1;
        int incY = -1;
        float expected = 0.251563f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 7)");
    };


    {
        int N = 1;
        float alpha = 0.1f;
        float X[] = { 0.481f };
        float Y[] = { 0.523f };
        int incX = -1;
        int incY = -1;
        float expected = 0.351563f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 8)");
    };


    {
        int N = 1;
        float alpha = 1.0f;
        float X[] = { 0.481f };
        float Y[] = { 0.523f };
        int incX = -1;
        int incY = -1;
        float expected = 1.251563f;
        float f;
        f = cblas_sdsdot (N, alpha, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdsdot(case 9)");
    };


    {
        int N = 1;
        float X[] = { 0.785f };
        float Y[] = { -0.7f };
        int incX = 1;
        int incY = -1;
        float expected = -0.5495f;
        float f;
        f = cblas_sdot(N, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdot(case 10)");
    };


    {
        int N = 1;
        double X[] = { 0.79 };
        double Y[] = { -0.679 };
        int incX = 1;
        int incY = -1;
        double expected = -0.53641;
        double f;
        f = cblas_ddot(N, X, incX, Y, incY);
        gsl_test_rel(f, expected, dbleps, "ddot(case 11)");
    };


    {
        int N = 1;
        float X[] = { 0.474f, -0.27f };
        float Y[] = { -0.144f, -0.392f };
        int incX = 1;
        int incY = -1;
        float expected[2] = {-0.174096f, -0.146928f};
        float f[2];
        cblas_cdotu_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 12) real");
        gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 12) imag");
    };


    {
        int N = 1;
        float X[] = { 0.474f, -0.27f };
        float Y[] = { -0.144f, -0.392f };
        int incX = 1;
        int incY = -1;
        float expected[2] = {0.037584f, -0.224688f};
        float f[2];
        cblas_cdotc_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 13) real");
        gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 13) imag");
    };


    {
        int N = 1;
        double X[] = { -0.87, -0.631 };
        double Y[] = { -0.7, -0.224 };
        int incX = 1;
        int incY = -1;
        double expected[2] = {0.467656, 0.63658};
        double f[2];
        cblas_zdotu_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 14) real");
        gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 14) imag");
    };


    {
        int N = 1;
        double X[] = { -0.87, -0.631 };
        double Y[] = { -0.7, -0.224 };
        int incX = 1;
        int incY = -1;
        double expected[2] = {0.750344, -0.24682};
        double f[2];
        cblas_zdotc_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 15) real");
        gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 15) imag");
    };


    {
        int N = 1;
        float X[] = { -0.457f };
        float Y[] = { 0.839f };
        int incX = -1;
        int incY = 1;
        float expected = -0.383423f;
        float f;
        f = cblas_sdot(N, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdot(case 16)");
    };


    {
        int N = 1;
        double X[] = { 0.949 };
        double Y[] = { -0.873 };
        int incX = -1;
        int incY = 1;
        double expected = -0.828477;
        double f;
        f = cblas_ddot(N, X, incX, Y, incY);
        gsl_test_rel(f, expected, dbleps, "ddot(case 17)");
    };


    {
        int N = 1;
        float X[] = { 0.852f, -0.045f };
        float Y[] = { 0.626f, -0.164f };
        int incX = -1;
        int incY = 1;
        float expected[2] = {0.525972f, -0.167898f};
        float f[2];
        cblas_cdotu_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 18) real");
        gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 18) imag");
    };


    {
        int N = 1;
        float X[] = { 0.852f, -0.045f };
        float Y[] = { 0.626f, -0.164f };
        int incX = -1;
        int incY = 1;
        float expected[2] = {0.540732f, -0.111558f};
        float f[2];
        cblas_cdotc_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 19) real");
        gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 19) imag");
    };


    {
        int N = 1;
        double X[] = { -0.786, -0.341 };
        double Y[] = { -0.271, -0.896 };
        int incX = -1;
        int incY = 1;
        double expected[2] = {-0.09253, 0.796667};
        double f[2];
        cblas_zdotu_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 20) real");
        gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 20) imag");
    };


    {
        int N = 1;
        double X[] = { -0.786, -0.341 };
        double Y[] = { -0.271, -0.896 };
        int incX = -1;
        int incY = 1;
        double expected[2] = {0.518542, 0.611845};
        double f[2];
        cblas_zdotc_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 21) real");
        gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 21) imag");
    };


    {
        int N = 1;
        float X[] = { -0.088f };
        float Y[] = { -0.165f };
        int incX = -1;
        int incY = -1;
        float expected = 0.01452f;
        float f;
        f = cblas_sdot(N, X, incX, Y, incY);
        gsl_test_rel(f, expected, flteps, "sdot(case 22)");
    };


    {
        int N = 1;
        double X[] = { -0.434 };
        double Y[] = { -0.402 };
        int incX = -1;
        int incY = -1;
        double expected = 0.174468;
        double f;
        f = cblas_ddot(N, X, incX, Y, incY);
        gsl_test_rel(f, expected, dbleps, "ddot(case 23)");
    };


    {
        int N = 1;
        float X[] = { -0.347f, 0.899f };
        float Y[] = { -0.113f, -0.858f };
        int incX = -1;
        int incY = -1;
        float expected[2] = {0.810553f, 0.196139f};
        float f[2];
        cblas_cdotu_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 24) real");
        gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 24) imag");
    };


    {
        int N = 1;
        float X[] = { -0.347f, 0.899f };
        float Y[] = { -0.113f, -0.858f };
        int incX = -1;
        int incY = -1;
        float expected[2] = {-0.732131f, 0.399313f};
        float f[2];
        cblas_cdotc_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 25) real");
        gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 25) imag");
    };


    {
        int N = 1;
        double X[] = { -0.897, -0.204 };
        double Y[] = { -0.759, 0.557 };
        int incX = -1;
        int incY = -1;
        double expected[2] = {0.794451, -0.344793};
        double f[2];
        cblas_zdotu_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 26) real");
        gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 26) imag");
    };


    {
        int N = 1;
        double X[] = { -0.897, -0.204 };
        double Y[] = { -0.759, 0.557 };
        int incX = -1;
        int incY = -1;
        double expected[2] = {0.567195, -0.654465};
        double f[2];
        cblas_zdotc_sub(N, X, incX, Y, incY, &f);
        gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 27) real");
        gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 27) imag");
    };


}
Beispiel #26
0
float caffe_cpu_strided_dot<float>(const int n, const float* x, const int incx,
                                   const float* y, const int incy) {
    return cblas_sdot(n, x, incx, y, incy);
}
Beispiel #27
0
static inline ffloat sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy)
{
  return cblas_sdot(*n, x, *incx, y, *incy);
}
Beispiel #28
0
 float wrapper_cblas_sdot(const int N, const float  *X, const int incX, const float  *Y, const int incY)
   {
   return      cblas_sdot(N, X, incX, Y, incY);
   }
Beispiel #29
0
static void master(int nslaves, char* parameterFile)
{
  //VARIABLE DECLARATIONS
  time_t startTime, computationStartTime, endTime;
  int rank, i, j, accel, MAX_ITER, *slave_ldAs, total_ldA, rdA, numLambdas, error;
  ISTAinstance_mpi* instance;
  float *xvalue, *result, *b, *lambdas, lambdaStart, lambdaFinish, gamma, step, MIN_FUNCDIFF;
  char regType, xfilename[MAX_FILENAME_SIZE], bfilename[MAX_FILENAME_SIZE], outfilename[MAX_FILENAME_SIZE];

  //START TIMER
  startTime = time(NULL);

  //GET VALUES FROM PARAMETER FILE
  getMasterParams(parameterFile, xfilename, bfilename, outfilename, &rdA, 
		  &numLambdas, &lambdaStart, &lambdaFinish, &gamma, &step, &regType, &accel, 
		  &MAX_ITER, &MIN_FUNCDIFF);

  //STORE EACH SLAVE'S INDIVIDUAL LDA AND CALCULATE TOTAL_LDA
  slave_ldAs = (int*)malloc((nslaves+1)*sizeof(int));
  int my_ldA = 0;
  MPI_Gather(&my_ldA, 1, MPI_INT, slave_ldAs, 1, MPI_INT, 0, MPI_COMM_WORLD);
  total_ldA = 0;
  for(i=0; i<=nslaves; i++)
    total_ldA += slave_ldAs[i];
  fprintf(stdout, "TOTAL LDA IS %d\n", total_ldA);


  //ALLOCATE MEMORY
  xvalue = calloc(rdA+1,sizeof(float));
  result = malloc((total_ldA+rdA)*sizeof(float));
  b      = malloc((total_ldA)*sizeof(float));
  lambdas = malloc(numLambdas*sizeof(float));
  if(xvalue==NULL || result==NULL || b==NULL || lambdas==NULL)
    fprintf(stdout,"Unable to allocate memory!");
  

  //ASSIGN VALUES TO XVALUE AND B
  error=1;
  if(strcmp(xfilename, "zeros")==0){
    //do nothing - calloc already initialized xvalue to 0
  }
  else 
    error *= getVector(xvalue, rdA, xfilename);
  error *= getVector(b, total_ldA, bfilename);

  //CHECK FOR FILEOPEN ERRORS; IF ANY PRESENT END PROGRAM
  for(i=1; i<=nslaves; i++)
    if(slave_ldAs[i] == -1) error=0;
  MPI_Bcast(&error, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if(error==0) {
    free(result);
    free(xvalue);
    free(b);
    free(lambdas);
    return;
  }
  
  //PRINT INPUTS
  /*  fprintf(stdout, "Here's x:\n");
  for(i=0; i < rdA; i++)
    {
      fprintf(stdout, "%f ", xvalue[i]);
    }
  fprintf(stdout, "\n and here's b:\n");
  for(i=0; i < total_ldA; i++)
    {
      fprintf(stdout, "%f ", b[i]);
    }
  */

  //CREATE ISTA OBJECT
  instance = ISTAinstance_mpi_new(slave_ldAs, total_ldA, rdA, b, lambdaStart, gamma, 
				  accel, regType, xvalue, step,
				  nslaves, MPI_COMM_WORLD,
				  TAG_AX, TAG_ATX, TAG_ATAX, TAG_DIE);
  
  //CENTER FEATURES
  float* shifts = calloc(rdA, sizeof(float));
  MPI_Reduce(shifts, instance->meanShifts, rdA, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
  cblas_sscal(rdA, 1.0 / total_ldA, instance->meanShifts, 1); 
  
  MPI_Bcast(instance->meanShifts, rdA, MPI_FLOAT, 0, MPI_COMM_WORLD);

  //SCALE FEATURES
  float* norms = calloc(rdA, sizeof(float));
  MPI_Reduce(norms, instance->scalingFactors, rdA, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
  for(j=0; j<rdA; j++)
    instance->scalingFactors[j] = pow(instance->scalingFactors[j], 0.5);

  MPI_Bcast(instance->scalingFactors, rdA, MPI_FLOAT, 0, MPI_COMM_WORLD); 

  //CREATE LAMBDA PATH
  calcLambdas(lambdas, numLambdas, lambdaStart, lambdaFinish, instance);

  //DEBUGGING AREA
  /*float* ones = calloc(rdA+1, sizeof(float));
  for(i=0; i< rdA+1; i++) {
    ones[i] = 1.0;
  }
  fprintf(stdout, "meanshifts: ");
  for(i=0; i<5; i++) {
    fprintf(stdout, "%f ", instance->meanShifts[i]);
  }
  fprintf(stdout, "\nscalingFactors: ");
  for(i=0; i<5; i++) {
    fprintf(stdout, "%f ", instance->scalingFactors[i]);
  }
  fprintf(stdout, "\nlambdas: ");
  for(i=0; i<5; i++) {
    fprintf(stdout, "%f ", lambdas[i]);
  }
  fprintf(stdout, "\nA * ones: ");
  multiply_Ax(ones, result, instance);
  for(i=0; i<5; i++) {
    fprintf(stdout, "%f ", result[i]);
  }
  fprintf(stdout, "\n");
  */
  
  //TIME UPDATE
  computationStartTime = time(NULL);

  //RUN ISTA
  for(j=0; j < numLambdas; j++) {
    instance->lambda = lambdas[j];

    ISTAsolve_lite(instance, MAX_ITER, MIN_FUNCDIFF);
    fprintf(stdout, "\n");
  }

  //UNDO RESCALING
  for(i=0; i<(instance->rdA); i++) {
    if(instance->scalingFactors[i] > 0.0001)
      instance->xcurrent[i] = instance->xcurrent[i] / instance->scalingFactors[i];
  }
  instance->intercept = -1.0 * cblas_sdot(instance->rdA, instance->xcurrent, 1, instance->meanShifts, 1);

  //WRITE RESULTS
  writeResults(instance, outfilename, bfilename, lambdas[numLambdas-1]);

  //STOP TIME
  endTime = time(NULL);
  fprintf(stdout,"Setup took %f seconds and computation took %f seconds\n",
	  difftime(computationStartTime, startTime), difftime(endTime, computationStartTime));

  //CLOSE THE SLAVE PROCESSES AND FREE MEMORY
  fprintf(stdout, "Closing the program\n");
  for(rank=1; rank <= nslaves; rank++)
    {
      MPI_Send(0, 0, MPI_INT, rank, TAG_DIE, MPI_COMM_WORLD);
    }

  free(result); 
  ISTAinstance_mpi_free(instance); 
  free(shifts); 
  free(norms); 
  free(lambdas);
  return;
}
float caffe_cpu_dot<float>(const int n, const float* x, const float* y) {
  return cblas_sdot(n, x, 1, y, 1);
}