Beispiel #1
0
void Householder<T>::evalHHmatrixData(const NDArray<T>& x, NDArray<T>& tail, T& coeff, T& normX) {

	// input validation
	if(!x.isVector() && !x.isScalar())
		throw "ops::helpers::Householder::evalHHmatrixData method: input array must be vector or scalar!";

	if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1)
		throw "ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!";

	normX = x.template reduceNumber<simdOps::Norm2<T>>();	
	const T min = DataTypeUtils::min<T>();	
		
	if(normX*normX - x(0)*x(0) <= min) {

		normX = x(0);
		coeff = (T)0.;		
		tail = (T)0.;		
	}
	else {
		
		if(x(0) >= (T)0.)
			normX = -normX;									// choose opposite sign to lessen roundoff error
		
		T u0 = x(0) - normX;
		coeff = -u0 / normX;				

		if(x.isRowVector())
			tail.assign(x({{}, {1, -1}}) / u0);		
		else
			tail.assign(x({{1, -1}, {}}) / u0);		
	}		
}
Beispiel #2
0
NDArray<T> Householder<T>::evalHHmatrix(const NDArray<T>& x) {

	// input validation
	if(!x.isVector() && !x.isScalar())
		throw "ops::helpers::Householder::evalHHmatrix method: input array must be vector or scalar!";

	NDArray<T> w((int)x.lengthOf(), 1,  x.ordering(), x.getWorkspace());							// column-vector
	NDArray<T> wT(1, (int)x.lengthOf(), x.ordering(), x.getWorkspace());							// row-vector (transposed w)	

	T coeff;
	T normX = x.template reduceNumber<simdOps::Norm2<T>>();	
	const T min = DataTypeUtils::min<T>();
	
	if(normX*normX - x(0)*x(0) <= min) {

		normX = x(0); 
		coeff = (T)0.;		
		w = (T)0.;
		
	} 	
	else {
		
		if(x(0) >= (T)0.)
			normX = -normX;									// choose opposite sign to lessen roundoff error
		
		T u0 = x(0) - normX;
		coeff = -u0 / normX;				
		w.assign(x / u0);		
	}
	
	w(0) = (T)1.;
	wT.assign(&w);
	
	NDArray<T> identity((int)x.lengthOf(), (int)x.lengthOf(), x.ordering(), x.getWorkspace());					 
	identity.setIdentity();																			// identity matrix	

	return identity - mmul(w, wT) * coeff;	

}
Beispiel #3
0
void Householder<T>::evalHHmatrixDataI(const NDArray<T>& x, T& coeff, T& normX) {

	int rows = (int)x.lengthOf()-1;
	int num = 1;
	
	if(rows == 0) {
		rows = 1;
		num = 0;
	}	
	
	NDArray<T> tail(rows, 1, x.ordering(), x.getWorkspace());
	evalHHmatrixData(x, tail, coeff, normX);

	if(x.isRowVector()) {
		NDArray<T>* temp = x.subarray({{}, {num, x.sizeAt(1)}});
		temp->assign(tail);
		delete temp;
	}
	else {		
		NDArray<T>* temp = x.subarray({{num, x.sizeAt(0)}, {}});
		temp->assign(tail);
		delete temp;
	}
}
Beispiel #4
0
            void skipgramBatchExec_(NDArray &s0, NDArray &s1, NDArray &s1n, void *vexpTable, void *vnegTable, void *vinfVector, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const int vocabSize, const int vectorLength, const int expLength, const int negLength, const bool preciseMode, const int numThreads) {
                //auto syn0 = reinterpret_cast<T*>(vsyn0);
                //auto syn1 = reinterpret_cast<T*>(vsyn1);
                //auto syn1Neg = reinterpret_cast<T*>(vsyn1Neg);
                const auto expTable = reinterpret_cast<T*>(vexpTable);
                const auto negTable = reinterpret_cast<T*>(vnegTable);
                const auto infVector = reinterpret_cast<T*>(vinfVector);

                T sneu1e[600];

                //const auto numThreads = omp_get_max_threads();
                const auto idxShift = indices.isEmpty() ? 0 : indices.sizeAt(1);
                const auto hsRounds = codes.isEmpty() ? 0 : codes.sizeAt(1);

                    // regular mode provides 0 guarantees for reproducibility
                    auto numTargets = targets.lengthOf();
                    auto bTarget = targets.bufferAsT<int>();
                    auto bIndices = indices.bufferAsT<int>();
                    auto bCodes = codes.bufferAsT<int8_t>();

#pragma omp parallel for num_threads(numThreads) private(sneu1e) default(shared) schedule(static)
                    for (int t = 0; t < numTargets; t++) {
                        T* neu1e = vectorLength <= 600 ? sneu1e : new T[vectorLength];
                        memset(neu1e, 0, vectorLength * sizeof(T));

                        auto target = bTarget[t];
                        auto alpha = lr.e<double>(t);
                        unsigned long long randomValue = nextRandom.e<Nd4jLong>(t);

                        auto syn0row = reinterpret_cast<T*>(s0.bufferWithOffset(target * vectorLength));

                        if (hsRounds > 0) {
                            int irow = 0;
                            auto cShift = t * idxShift;

                            for (int e = 0; e < hsRounds; e++) {
                                irow = bIndices[e + cShift];
                                if (irow < 0 || irow >= vocabSize)
                                    continue;

                                auto syn1row = s1.bufferWithOffset(irow * vectorLength);
                                auto code = bCodes[e + cShift];

                                    //nd4j_printf("syn0: [%i]; syn1: [%i]; code: [%i]\n", target, irow, code);
                                hSoftmax_<T>(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, code, expLength, false);
                            }
                        }


                        if (nsRounds > 0) {
                            int irow = negStarters.e<int>(t);
                            int nsStarter = irow;
                            for (int r = 0; r < nsRounds + 1; r++) {
                                if (r == 0) {
                                    // target is known in advance
                                } else {
                                    randomValue = randomValue * (unsigned long long) 25214903917 + 11;
                                    auto idx = nd4j::math::nd4j_abs<Nd4jLong >((randomValue >> 16) % negLength);
                                    irow = idx >= negLength ? -1 : static_cast<int>(negTable[idx]);

                                    if (irow < 0 || irow >= vocabSize)
                                        irow = randomValue % (vocabSize - 1) + 1;

                                    if (irow == nsStarter)
                                        continue;
                                }

                                nSampling_<T>(syn0row, s1n.bufferWithOffset(irow * vectorLength), expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, infVector != nullptr);
                            }
                        }

                        #pragma omp simd
                        for (int e = 0; e < vectorLength; e++)
                            syn0row[e] += neu1e[e];


                        // optionally release temp arrays
                        if (vectorLength > 600)
                            delete[] neu1e;
                    }