void DecoderBinaural::process(const double* inputs, double* outputs) { m_decoder->process(inputs, m_channels_vector_double); --m_index; m_channels_inputs_left[0][m_index] = m_channels_vector_double[0]; outputs[1] = outputs[0] = cblas_sdot(m_impulses_size, m_channels_inputs_left[0]+m_index, 1, m_impulses_vector[0], 1); for(int i = 1; i < m_number_of_virtual_channels; i++) { m_channels_inputs_left[i][m_index] = m_channels_vector_double[i]; m_channels_inputs_right[i][m_index] = m_channels_vector_double[i]; outputs[0] += cblas_sdot(m_impulses_size, m_channels_inputs_left[i]+m_index, 1, m_impulses_vector[m_number_of_virtual_channels - i], 1); outputs[1] += cblas_sdot(m_impulses_size, m_channels_inputs_right[i]+m_index, 1, m_impulses_vector[i], 1); } if(m_index <= 0) { m_index = m_impulses_size; cblas_scopy(m_impulses_size, m_channels_inputs_left[0], 1, m_channels_inputs_left[0]+m_impulses_size, 1); for(int i = 1; i < m_number_of_virtual_channels; i++) { cblas_scopy(m_impulses_size, m_channels_inputs_left[i], 1, m_channels_inputs_left[i]+m_impulses_size, 1); cblas_scopy(m_impulses_size, m_channels_inputs_right[i], 1, m_channels_inputs_right[i]+m_impulses_size, 1); } } }
void preprocess1(int panelSz, int D, float *XX, float*X, float*Y, float lamda, float*Z, float*B){ // panelSz: the number of points to process in each round int i,j; // step 1: compute all X[i]'*X[j] (i>j) for (i=panelSz-1;i>0;i--) for (j=i-1;j>=0;j--){ XX[i*panelSz+j] = cblas_sdot(D, &(X[i*D]), 1, &(X[j*D]), 1); // printf("XX[%d]=%8.4f, X[%d]=%8.4f, X[%d]=%8.4f\n", i*panelSz+j, XX[i*panelSz+j], i*D, X[i*D], j*D, X[j*D]); } // step 2: compute all Z vectors // Z0=lamda*X[0], B=lamda*X[0]*Y[0] cblas_scopy(D, X, 1, Z, 1); cblas_sscal(D, lamda, Z, 1); float alpha=lamda*Y[0]; cblas_scopy(D, X, 1, B, 1); cblas_sscal(D, alpha, B, 1); for (i=1; i<panelSz;i++){ cblas_scopy(D, &(X[i*D]), 1, &(Z[i*D]),1); // Z[i] = lamda*(X[i] - sum_{j<i} XX[i,j]*Z[j]); for (j=0;j<i;j++){ cblas_saxpy(D, -1*XX[i*panelSz+j], &(Z[j*D]), 1, &(Z[i*D]), 1); } cblas_sscal(D, lamda, &(Z[i*D]), 1); // B = lamda*(Y[i] - X[i]*B) X[i] + B; float alpha = lamda*(Y[i]-cblas_sdot(D, &(X[i*D]), 1, B, 1)); cblas_saxpy(D, alpha, &(X[i*D]), 1, B, 1); } }
void replace_nans_avgs(int vec_blocksize, type_precision* vec, int rows , int cols, list<long int>* ar_nan_idxs) { type_precision* ones = new type_precision[rows]; for (int i = 0; i < rows; i++) { ones[i] = 1.0; } for (int i = 0; i < vec_blocksize * cols; i++) { type_precision sum = cblas_sdot(rows, ones, 1, &vec[i * rows], 1); type_precision avg = sum / (rows - ar_nan_idxs[i].size()); for (list<long int>::iterator it = ar_nan_idxs[i].begin(); it != ar_nan_idxs[i].end(); it++) { int idx = i * rows + (*it); vec[idx] = avg; } } delete []ones; }
real THBlas_(dot)(long n, real *x, long incx, real *y, long incy) { if(n == 1) { incx = 1; incy = 1; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) return cblas_ddot(i_n, x, i_incx, y, i_incy); #else return cblas_sdot(i_n, x, i_incx, y, i_incy); #endif } #endif { long i; real sum = 0; for(i = 0; i < n; i++) sum += x[i*incx]*y[i*incy]; return sum; } }
/* Trains a network by presenting an example and * adjusts the weights by stochastic gradient * descent to reduce a squared hinge loss */ void train(nnet_t* n, sparse_t* v, int target){ int i; /* Forward pass */ cblas_scopy(n->hidden,n->b1,1,n->a1,1); for(i=0; i<v->nz; i++){ cblas_saxpy(n->hidden, v->x[i], n->W1[v->idx[i]], 1, n->a1, 1); } activation(n->a1,n->x1,n->g1,n->hidden); n->a2 = n->b2 + cblas_sdot(n->hidden, n->W2, 1, n->x1, 1); activation(&n->a2,&n->x2,&n->g2,1); if(target*n->x2 > 1) /* Hinge loss, no error -> no need to backpropagate */ return; /* Backward pass */ n->d2 = (target-n->x2)*n->g2; cblas_scopy(n->hidden,n->W2,1,n->d1,1); for(i=0; i<n->hidden; i++) n->d1[i] *= n->d2*n->g1[i]; n->b2 += n->eta*n->d2; cblas_saxpy(n->hidden, n->eta*n->d2, n->x1, 1, n->W2, 1); cblas_saxpy(n->hidden, n->eta, n->d1, 1, n->b1, 1); /* Sparse inputs imply sparse gradients. * This update saves a lot of computation * compared to general purpose neural net * implementations. */ for(i=0; i<v->nz; i++){ cblas_saxpy(n->hidden, n->eta*v->x[i], n->d1, 1, n->W1[v->idx[i]], 1); } }
real THBlas_dot(long size, real *x, long xStride, real *y, long yStride) { if(size == 1) { xStride = 1; yStride = 1; } #if USE_CBLAS if( (size < INT_MAX) && (xStride < INT_MAX) && (yStride < INT_MAX) ) { #ifdef USE_DOUBLE return cblas_ddot(size, x, xStride, y, yStride); #else return cblas_sdot(size, x, xStride, y, yStride); #endif } #endif { long i; real sum = 0; for(i = 0; i < size; i++) sum += x[i*xStride]*y[i*yStride]; return sum; } }
value_t<A> dot(const A& a, const B& b) { if constexpr (all_dma<A, B>) { a.ensure_cpu_up_to_date(); b.ensure_cpu_up_to_date(); if constexpr (all_single_precision<A, B>) { return cblas_sdot(etl::size(a), a.memory_start(), 1, b.memory_start(), 1); } else { return cblas_ddot(etl::size(a), a.memory_start(), 1, b.memory_start(), 1);
JNIEXPORT jfloat JNICALL Java_uncomplicate_neanderthal_CBLAS_sdot (JNIEnv *env, jclass clazz, jint N, jobject X, jint offsetX, jint incX, jobject Y, jint offsetY, jint incY) { float *cX = (float *) (*env)->GetDirectBufferAddress(env, X); float *cY = (float *) (*env)->GetDirectBufferAddress(env, Y); return cblas_sdot(N, cX + offsetX, incX, cY + offsetY, incY); };
void Vector::processVelocity(const float* inputs, float* outputs) { float veclocitySum = 0.f, velocityAbscissa = 0.f, velocityOrdinate = 0.f; veclocitySum = 0.; for(int i = 0; i < m_number_of_channels; i++) veclocitySum += inputs[i]; velocityAbscissa = cblas_sdot(m_number_of_channels, inputs, 1, m_channels_abscissa_float, 1); velocityOrdinate = cblas_sdot(m_number_of_channels, inputs, 1, m_channels_ordinate_float, 1); if(veclocitySum) { outputs[0] = velocityAbscissa / veclocitySum; outputs[1] = velocityOrdinate / veclocitySum; } else { outputs[0] = 0.; outputs[1] = 0.; } }
float HostVector<float>::Dot(const BaseVector<float> &x) const { assert(&x != NULL); const HostVector<float> *cast_x = dynamic_cast<const HostVector<float>*> (&x); assert(cast_x != NULL); assert(this->size_ == cast_x->size_); return cblas_sdot(this->size_, this->vec_, 1, cast_x->vec_, 1); }
/* * Class: com_intel_analytics_bigdl_mkl_MKL * Method: vsdot * Signature: (I[FII[FII)V */ JNIEXPORT float JNICALL Java_com_intel_analytics_bigdl_mkl_MKL_vsdot (JNIEnv * env, jclass cls, jint n, jfloatArray x, jint xOffset, jint incx, jfloatArray y, jint yOffset, jint incy) { jfloat * jni_x = (*env)->GetPrimitiveArrayCritical(env, x, JNI_FALSE); jfloat * jni_y = (*env)->GetPrimitiveArrayCritical(env, y, JNI_FALSE); float res = cblas_sdot(n, jni_x + xOffset, incx, jni_y + yOffset, incy); (*env)->ReleasePrimitiveArrayCritical(env, y, jni_y, 0); (*env)->ReleasePrimitiveArrayCritical(env, x, jni_x, 0); return res; }
static void dotprod(const RView& X, const RView& Y, T& result) { #ifdef MKL const float * x = X.data(); const float * y = Y.data(); result = cblas_sdot(DIM_N, x, 1, y, 1); #else for (int i=0; i<DIM_N; ++i) result += X[i] * Y[i]; #endif }
/* Given an input vector v, compute the output of the network. */ float value(nnet_t* n, sparse_t* v){ int i; cblas_scopy(n->hidden,n->b1,1,n->a1,1); for(i=0; i<v->nz; i++){ cblas_saxpy(n->hidden, v->x[i], n->W1[v->idx[i]], 1, n->a1, 1); } activation(n->a1,n->x1,n->g1,n->hidden); n->a2 = n->b2; n->a2 += cblas_sdot(n->hidden, n->W2, 1, n->x1, 1); activation(&n->a2,&n->x2,&n->g2,1); return n->x2; }
void bli_sdot( conj_t conj, int n, float* x, int incx, float* y, int incy, float* rho ) { #ifdef BLIS_ENABLE_CBLAS_INTERFACES *rho = cblas_sdot( n, x, incx, y, incy ); #else *rho = F77_sdot( &n, x, &incx, y, &incy ); #endif }
JNIEXPORT jfloat JNICALL Java_edu_berkeley_bid_CBLAS_sdotxx (JNIEnv * env, jobject calling_obj, jint N, jfloatArray jX, jint startX, jfloatArray jY, jint startY){ jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); jfloat returnValue; returnValue = cblas_sdot(N, X+startX, 1, Y+startY, 1); (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); return returnValue; }
value_t<A> sum(const A& a) { if constexpr (is_dma<A>) { etl::dyn_vector<value_t<A>> ones(etl::size(a)); ones = value_t<A>(1); a.ensure_cpu_up_to_date(); [[maybe_unused]] const auto* m_a = a.memory_start(); [[maybe_unused]] const auto* m_b = ones.memory_start(); if constexpr (is_single_precision<A>) { return cblas_sdot(etl::size(a), m_a, 1, m_b, 1); } else if constexpr (is_double_precision<A>) {
void Vector::processEnergy(const float* inputs, float* outputs) { float energySum = 0.f, energyAbscissa = 0.f, energyOrdinate = 0.f; cblas_scopy(m_number_of_channels, inputs, 1, m_channels_float, 1); for(int i = 0; i < m_number_of_channels; i++) m_channels_float[i] *= m_channels_float[i]; energySum = cblas_sasum(m_number_of_channels, m_channels_float, 1); energyAbscissa = cblas_sdot(m_number_of_channels, m_channels_float, 1, m_channels_abscissa_float, 1); energyOrdinate = cblas_sdot(m_number_of_channels, m_channels_float, 1, m_channels_ordinate_float, 1); if(energySum) { outputs[0] = energyAbscissa / energySum; outputs[1] = energyOrdinate / energySum; } else { outputs[0] = 0.; outputs[1] = 0.; } }
inline float dot_call( dot_params< float >& p ) { //std::cout << "calling blas sdot (single precision) " << std::endl; float vvi = cblas_sdot( p.n, p.x, p.inc_x, p.y, p.inc_y ); return vvi; }
static void FLOAT_dot(void *a, intp stridea, void *b, intp strideb, void *res, intp n, void *tmp) { register int na = stridea / sizeof(float); register int nb = strideb / sizeof(float); if ((sizeof(float) * na == stridea) && (sizeof(float) * nb == strideb) && (na >= 0) && (nb >= 0)) *((float *)res) = cblas_sdot((int)n, (float *)a, na, (float *)b, nb); else oldFunctions[PyArray_FLOAT](a, stridea, b, strideb, res, n, tmp); }
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_sdotm (JNIEnv * env, jobject calling_obj, jint nrows, jint ncols, jfloatArray jX, jint ldx, jfloatArray jY, jint ldy, jfloatArray jZ){ jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); jfloat * Z = (*env)->GetPrimitiveArrayCritical(env, jZ, JNI_FALSE); int i, j; for (i = 0; i < ncols; i++) { Z[i] = cblas_sdot(nrows, X+i*ldx, 1, Y+i*ldy, 1); } (*env)->ReleasePrimitiveArrayCritical(env, jZ, Z, 0); (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); }
bool dot(REAL &inner, const RealVector &x, const RealVector &y) { bool flag = true; UINT N; INT incX, incY; N = x.size; incX = 1; incY = 1; if (NULL == &x || NULL == &y) { flag = false; goto end; } if (x.size != y.size) { flag = false; goto end; } inner = cblas_sdot(N, x.M, incX, y.M, incY); end: return flag; }
inline static void magma_slarfxsym_v2(magma_int_t n, float *A, magma_int_t lda, float *V, float *TAU, float *work) { /* WORK (workspace) float real array, dimension N */ magma_int_t ione = 1; float dtmp; float c_zero = MAGMA_S_ZERO; float c_neg_one= MAGMA_S_NEG_ONE; float c_half = MAGMA_S_HALF; /* X = AVtau */ blasf77_ssymv("L",&n, TAU, A, &lda, V, &ione, &c_zero, work, &ione); /* compute dtmp= X'*V */ #if defined(PRECISION_z) || defined(PRECISION_c) dtmp = c_zero; for (magma_int_t j = 0; j < n ; j++) dtmp = dtmp + MAGMA_S_CNJG(work[j]) * V[j]; //cblas_sdot_sub(n, work, ione, V, ione, &dtmp); #else dtmp = cblas_sdot(n, work, ione, V, ione); #endif /* compute 1/2 X'*V*t = 1/2*dtmp*tau */ dtmp = -dtmp * c_half * (*TAU); /* compute W=X-1/2VX'Vt = X - dtmp*V */ blasf77_saxpy(&n, &dtmp, V, &ione, work, &ione); /* performs the symmetric rank 2 operation A := alpha*x*y' + alpha*y*x' + A */ blasf77_ssyr2("L", &n, &c_neg_one, work, &ione, V, &ione, A, &lda); }
typename TypeTraits<typename Vector::ScalarType>::magnitude_type dot(const Vector& x, const Vector& y) { int n = x.coefs.size(); #ifdef MINIFE_DEBUG if (y.local_size < n) { std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<<std::endl; n = y.local_size; } #endif typedef typename Vector::ScalarType Scalar; typedef typename TypeTraits<typename Vector::ScalarType>::magnitude_type magnitude; const Scalar* xcoefs = &x.coefs[0]; const Scalar* ycoefs = &y.coefs[0]; #if defined(MINIFE_MKL_DOUBLE) magnitude result = cblas_ddot( #elif defined(MINIFE_MKL_FLOAT) magnitude result = cblas_sdot( #else #error Unknown MINIFE_SCALAR type. #endif (MKL_INT) n, xcoefs, (MKL_INT) 1, ycoefs, (MKL_INT) 1); #ifdef HAVE_MPI magnitude local_dot = result, global_dot = 0; MPI_Datatype mpi_dtype = TypeTraits<magnitude>::mpi_type(); MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); return global_dot; #else return result; #endif }
//#define NUMERIC_PartialDeriv_CALC void BuildMatrix(BkgFitMatrixPacker *fit,bool accum, bool debug) { #if 1 (void) debug; fit->BuildMatrix(accum); #else mat_assembly_instruction *pinst = fit->instList; int lineInc=0; // build JTJ and RHS matricies for (int i=0; i < fit->nInstr; i++) { double sum=0.0; for (int j=0; j < pinst->cnt; j++) sum += cblas_sdot(pinst->si[j].len,pinst->si[j].src1,1,pinst->si[j].src2,1); if (accum) * (pinst->dst) += sum; else * (pinst->dst) = sum; if (debug) { char *src1Name = findName(pinst->si[0].src1); char *src2Name = findName(pinst->si[0].src2); printf("%d(%s--%s)(%lf) ",i,src1Name,src2Name,* (pinst->dst)); if (lineInc++ > 6) { printf("\n "); lineInc = 0; } } pinst++; } #endif }
void test_dot (void) { const double flteps = 1e-4, dbleps = 1e-6; { int N = 1; float alpha = 0.0f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 0.604725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 1)"); }; { int N = 1; float alpha = 0.1f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 0.704725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 2)"); }; { int N = 1; float alpha = 1.0f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 1.604725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 3)"); }; { int N = 1; float alpha = 0.0f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 0.541604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 4)"); }; { int N = 1; float alpha = 0.1f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 0.641604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 5)"); }; { int N = 1; float alpha = 1.0f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 1.541604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 6)"); }; { int N = 1; float alpha = 0.0f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 0.251563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 7)"); }; { int N = 1; float alpha = 0.1f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 0.351563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 8)"); }; { int N = 1; float alpha = 1.0f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 1.251563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 9)"); }; { int N = 1; float X[] = { 0.785f }; float Y[] = { -0.7f }; int incX = 1; int incY = -1; float expected = -0.5495f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 10)"); }; { int N = 1; double X[] = { 0.79 }; double Y[] = { -0.679 }; int incX = 1; int incY = -1; double expected = -0.53641; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 11)"); }; { int N = 1; float X[] = { 0.474f, -0.27f }; float Y[] = { -0.144f, -0.392f }; int incX = 1; int incY = -1; float expected[2] = {-0.174096f, -0.146928f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 12) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 12) imag"); }; { int N = 1; float X[] = { 0.474f, -0.27f }; float Y[] = { -0.144f, -0.392f }; int incX = 1; int incY = -1; float expected[2] = {0.037584f, -0.224688f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 13) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 13) imag"); }; { int N = 1; double X[] = { -0.87, -0.631 }; double Y[] = { -0.7, -0.224 }; int incX = 1; int incY = -1; double expected[2] = {0.467656, 0.63658}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 14) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 14) imag"); }; { int N = 1; double X[] = { -0.87, -0.631 }; double Y[] = { -0.7, -0.224 }; int incX = 1; int incY = -1; double expected[2] = {0.750344, -0.24682}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 15) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 15) imag"); }; { int N = 1; float X[] = { -0.457f }; float Y[] = { 0.839f }; int incX = -1; int incY = 1; float expected = -0.383423f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 16)"); }; { int N = 1; double X[] = { 0.949 }; double Y[] = { -0.873 }; int incX = -1; int incY = 1; double expected = -0.828477; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 17)"); }; { int N = 1; float X[] = { 0.852f, -0.045f }; float Y[] = { 0.626f, -0.164f }; int incX = -1; int incY = 1; float expected[2] = {0.525972f, -0.167898f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 18) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 18) imag"); }; { int N = 1; float X[] = { 0.852f, -0.045f }; float Y[] = { 0.626f, -0.164f }; int incX = -1; int incY = 1; float expected[2] = {0.540732f, -0.111558f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 19) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 19) imag"); }; { int N = 1; double X[] = { -0.786, -0.341 }; double Y[] = { -0.271, -0.896 }; int incX = -1; int incY = 1; double expected[2] = {-0.09253, 0.796667}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 20) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 20) imag"); }; { int N = 1; double X[] = { -0.786, -0.341 }; double Y[] = { -0.271, -0.896 }; int incX = -1; int incY = 1; double expected[2] = {0.518542, 0.611845}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 21) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 21) imag"); }; { int N = 1; float X[] = { -0.088f }; float Y[] = { -0.165f }; int incX = -1; int incY = -1; float expected = 0.01452f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 22)"); }; { int N = 1; double X[] = { -0.434 }; double Y[] = { -0.402 }; int incX = -1; int incY = -1; double expected = 0.174468; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 23)"); }; { int N = 1; float X[] = { -0.347f, 0.899f }; float Y[] = { -0.113f, -0.858f }; int incX = -1; int incY = -1; float expected[2] = {0.810553f, 0.196139f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 24) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 24) imag"); }; { int N = 1; float X[] = { -0.347f, 0.899f }; float Y[] = { -0.113f, -0.858f }; int incX = -1; int incY = -1; float expected[2] = {-0.732131f, 0.399313f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 25) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 25) imag"); }; { int N = 1; double X[] = { -0.897, -0.204 }; double Y[] = { -0.759, 0.557 }; int incX = -1; int incY = -1; double expected[2] = {0.794451, -0.344793}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 26) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 26) imag"); }; { int N = 1; double X[] = { -0.897, -0.204 }; double Y[] = { -0.759, 0.557 }; int incX = -1; int incY = -1; double expected[2] = {0.567195, -0.654465}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 27) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 27) imag"); }; }
float caffe_cpu_strided_dot<float>(const int n, const float* x, const int incx, const float* y, const int incy) { return cblas_sdot(n, x, incx, y, incy); }
static inline ffloat sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy) { return cblas_sdot(*n, x, *incx, y, *incy); }
float wrapper_cblas_sdot(const int N, const float *X, const int incX, const float *Y, const int incY) { return cblas_sdot(N, X, incX, Y, incY); }
static void master(int nslaves, char* parameterFile) { //VARIABLE DECLARATIONS time_t startTime, computationStartTime, endTime; int rank, i, j, accel, MAX_ITER, *slave_ldAs, total_ldA, rdA, numLambdas, error; ISTAinstance_mpi* instance; float *xvalue, *result, *b, *lambdas, lambdaStart, lambdaFinish, gamma, step, MIN_FUNCDIFF; char regType, xfilename[MAX_FILENAME_SIZE], bfilename[MAX_FILENAME_SIZE], outfilename[MAX_FILENAME_SIZE]; //START TIMER startTime = time(NULL); //GET VALUES FROM PARAMETER FILE getMasterParams(parameterFile, xfilename, bfilename, outfilename, &rdA, &numLambdas, &lambdaStart, &lambdaFinish, &gamma, &step, ®Type, &accel, &MAX_ITER, &MIN_FUNCDIFF); //STORE EACH SLAVE'S INDIVIDUAL LDA AND CALCULATE TOTAL_LDA slave_ldAs = (int*)malloc((nslaves+1)*sizeof(int)); int my_ldA = 0; MPI_Gather(&my_ldA, 1, MPI_INT, slave_ldAs, 1, MPI_INT, 0, MPI_COMM_WORLD); total_ldA = 0; for(i=0; i<=nslaves; i++) total_ldA += slave_ldAs[i]; fprintf(stdout, "TOTAL LDA IS %d\n", total_ldA); //ALLOCATE MEMORY xvalue = calloc(rdA+1,sizeof(float)); result = malloc((total_ldA+rdA)*sizeof(float)); b = malloc((total_ldA)*sizeof(float)); lambdas = malloc(numLambdas*sizeof(float)); if(xvalue==NULL || result==NULL || b==NULL || lambdas==NULL) fprintf(stdout,"Unable to allocate memory!"); //ASSIGN VALUES TO XVALUE AND B error=1; if(strcmp(xfilename, "zeros")==0){ //do nothing - calloc already initialized xvalue to 0 } else error *= getVector(xvalue, rdA, xfilename); error *= getVector(b, total_ldA, bfilename); //CHECK FOR FILEOPEN ERRORS; IF ANY PRESENT END PROGRAM for(i=1; i<=nslaves; i++) if(slave_ldAs[i] == -1) error=0; MPI_Bcast(&error, 1, MPI_INT, 0, MPI_COMM_WORLD); if(error==0) { free(result); free(xvalue); free(b); free(lambdas); return; } //PRINT INPUTS /* fprintf(stdout, "Here's x:\n"); for(i=0; i < rdA; i++) { fprintf(stdout, "%f ", xvalue[i]); } fprintf(stdout, "\n and here's b:\n"); for(i=0; i < total_ldA; i++) { fprintf(stdout, "%f ", b[i]); } */ //CREATE ISTA OBJECT instance = ISTAinstance_mpi_new(slave_ldAs, total_ldA, rdA, b, lambdaStart, gamma, accel, regType, xvalue, step, nslaves, MPI_COMM_WORLD, TAG_AX, TAG_ATX, TAG_ATAX, TAG_DIE); //CENTER FEATURES float* shifts = calloc(rdA, sizeof(float)); MPI_Reduce(shifts, instance->meanShifts, rdA, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); cblas_sscal(rdA, 1.0 / total_ldA, instance->meanShifts, 1); MPI_Bcast(instance->meanShifts, rdA, MPI_FLOAT, 0, MPI_COMM_WORLD); //SCALE FEATURES float* norms = calloc(rdA, sizeof(float)); MPI_Reduce(norms, instance->scalingFactors, rdA, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); for(j=0; j<rdA; j++) instance->scalingFactors[j] = pow(instance->scalingFactors[j], 0.5); MPI_Bcast(instance->scalingFactors, rdA, MPI_FLOAT, 0, MPI_COMM_WORLD); //CREATE LAMBDA PATH calcLambdas(lambdas, numLambdas, lambdaStart, lambdaFinish, instance); //DEBUGGING AREA /*float* ones = calloc(rdA+1, sizeof(float)); for(i=0; i< rdA+1; i++) { ones[i] = 1.0; } fprintf(stdout, "meanshifts: "); for(i=0; i<5; i++) { fprintf(stdout, "%f ", instance->meanShifts[i]); } fprintf(stdout, "\nscalingFactors: "); for(i=0; i<5; i++) { fprintf(stdout, "%f ", instance->scalingFactors[i]); } fprintf(stdout, "\nlambdas: "); for(i=0; i<5; i++) { fprintf(stdout, "%f ", lambdas[i]); } fprintf(stdout, "\nA * ones: "); multiply_Ax(ones, result, instance); for(i=0; i<5; i++) { fprintf(stdout, "%f ", result[i]); } fprintf(stdout, "\n"); */ //TIME UPDATE computationStartTime = time(NULL); //RUN ISTA for(j=0; j < numLambdas; j++) { instance->lambda = lambdas[j]; ISTAsolve_lite(instance, MAX_ITER, MIN_FUNCDIFF); fprintf(stdout, "\n"); } //UNDO RESCALING for(i=0; i<(instance->rdA); i++) { if(instance->scalingFactors[i] > 0.0001) instance->xcurrent[i] = instance->xcurrent[i] / instance->scalingFactors[i]; } instance->intercept = -1.0 * cblas_sdot(instance->rdA, instance->xcurrent, 1, instance->meanShifts, 1); //WRITE RESULTS writeResults(instance, outfilename, bfilename, lambdas[numLambdas-1]); //STOP TIME endTime = time(NULL); fprintf(stdout,"Setup took %f seconds and computation took %f seconds\n", difftime(computationStartTime, startTime), difftime(endTime, computationStartTime)); //CLOSE THE SLAVE PROCESSES AND FREE MEMORY fprintf(stdout, "Closing the program\n"); for(rank=1; rank <= nslaves; rank++) { MPI_Send(0, 0, MPI_INT, rank, TAG_DIE, MPI_COMM_WORLD); } free(result); ISTAinstance_mpi_free(instance); free(shifts); free(norms); free(lambdas); return; }
float caffe_cpu_dot<float>(const int n, const float* x, const float* y) { return cblas_sdot(n, x, 1, y, 1); }