void Householder<T>::evalHHmatrixData(const NDArray<T>& x, NDArray<T>& tail, T& coeff, T& normX) { // input validation if(!x.isVector() && !x.isScalar()) throw "ops::helpers::Householder::evalHHmatrixData method: input array must be vector or scalar!"; if(!x.isScalar() && x.lengthOf() != tail.lengthOf() + 1) throw "ops::helpers::Householder::evalHHmatrixData method: input tail vector must have length less than unity compared to input x vector!"; normX = x.template reduceNumber<simdOps::Norm2<T>>(); const T min = DataTypeUtils::min<T>(); if(normX*normX - x(0)*x(0) <= min) { normX = x(0); coeff = (T)0.; tail = (T)0.; } else { if(x(0) >= (T)0.) normX = -normX; // choose opposite sign to lessen roundoff error T u0 = x(0) - normX; coeff = -u0 / normX; if(x.isRowVector()) tail.assign(x({{}, {1, -1}}) / u0); else tail.assign(x({{1, -1}, {}}) / u0); } }
NDArray<T> Householder<T>::evalHHmatrix(const NDArray<T>& x) { // input validation if(!x.isVector() && !x.isScalar()) throw "ops::helpers::Householder::evalHHmatrix method: input array must be vector or scalar!"; NDArray<T> w((int)x.lengthOf(), 1, x.ordering(), x.getWorkspace()); // column-vector NDArray<T> wT(1, (int)x.lengthOf(), x.ordering(), x.getWorkspace()); // row-vector (transposed w) T coeff; T normX = x.template reduceNumber<simdOps::Norm2<T>>(); const T min = DataTypeUtils::min<T>(); if(normX*normX - x(0)*x(0) <= min) { normX = x(0); coeff = (T)0.; w = (T)0.; } else { if(x(0) >= (T)0.) normX = -normX; // choose opposite sign to lessen roundoff error T u0 = x(0) - normX; coeff = -u0 / normX; w.assign(x / u0); } w(0) = (T)1.; wT.assign(&w); NDArray<T> identity((int)x.lengthOf(), (int)x.lengthOf(), x.ordering(), x.getWorkspace()); identity.setIdentity(); // identity matrix return identity - mmul(w, wT) * coeff; }
void Householder<T>::evalHHmatrixDataI(const NDArray<T>& x, T& coeff, T& normX) { int rows = (int)x.lengthOf()-1; int num = 1; if(rows == 0) { rows = 1; num = 0; } NDArray<T> tail(rows, 1, x.ordering(), x.getWorkspace()); evalHHmatrixData(x, tail, coeff, normX); if(x.isRowVector()) { NDArray<T>* temp = x.subarray({{}, {num, x.sizeAt(1)}}); temp->assign(tail); delete temp; } else { NDArray<T>* temp = x.subarray({{num, x.sizeAt(0)}, {}}); temp->assign(tail); delete temp; } }
void skipgramBatchExec_(NDArray &s0, NDArray &s1, NDArray &s1n, void *vexpTable, void *vnegTable, void *vinfVector, NDArray &targets, NDArray &negStarters, NDArray &indices, NDArray &codes, NDArray &lr, NDArray &nextRandom, const int nsRounds, const int vocabSize, const int vectorLength, const int expLength, const int negLength, const bool preciseMode, const int numThreads) { //auto syn0 = reinterpret_cast<T*>(vsyn0); //auto syn1 = reinterpret_cast<T*>(vsyn1); //auto syn1Neg = reinterpret_cast<T*>(vsyn1Neg); const auto expTable = reinterpret_cast<T*>(vexpTable); const auto negTable = reinterpret_cast<T*>(vnegTable); const auto infVector = reinterpret_cast<T*>(vinfVector); T sneu1e[600]; //const auto numThreads = omp_get_max_threads(); const auto idxShift = indices.isEmpty() ? 0 : indices.sizeAt(1); const auto hsRounds = codes.isEmpty() ? 0 : codes.sizeAt(1); // regular mode provides 0 guarantees for reproducibility auto numTargets = targets.lengthOf(); auto bTarget = targets.bufferAsT<int>(); auto bIndices = indices.bufferAsT<int>(); auto bCodes = codes.bufferAsT<int8_t>(); #pragma omp parallel for num_threads(numThreads) private(sneu1e) default(shared) schedule(static) for (int t = 0; t < numTargets; t++) { T* neu1e = vectorLength <= 600 ? sneu1e : new T[vectorLength]; memset(neu1e, 0, vectorLength * sizeof(T)); auto target = bTarget[t]; auto alpha = lr.e<double>(t); unsigned long long randomValue = nextRandom.e<Nd4jLong>(t); auto syn0row = reinterpret_cast<T*>(s0.bufferWithOffset(target * vectorLength)); if (hsRounds > 0) { int irow = 0; auto cShift = t * idxShift; for (int e = 0; e < hsRounds; e++) { irow = bIndices[e + cShift]; if (irow < 0 || irow >= vocabSize) continue; auto syn1row = s1.bufferWithOffset(irow * vectorLength); auto code = bCodes[e + cShift]; //nd4j_printf("syn0: [%i]; syn1: [%i]; code: [%i]\n", target, irow, code); hSoftmax_<T>(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, code, expLength, false); } } if (nsRounds > 0) { int irow = negStarters.e<int>(t); int nsStarter = irow; for (int r = 0; r < nsRounds + 1; r++) { if (r == 0) { // target is known in advance } else { randomValue = randomValue * (unsigned long long) 25214903917 + 11; auto idx = nd4j::math::nd4j_abs<Nd4jLong >((randomValue >> 16) % negLength); irow = idx >= negLength ? -1 : static_cast<int>(negTable[idx]); if (irow < 0 || irow >= vocabSize) irow = randomValue % (vocabSize - 1) + 1; if (irow == nsStarter) continue; } nSampling_<T>(syn0row, s1n.bufferWithOffset(irow * vectorLength), expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, infVector != nullptr); } } #pragma omp simd for (int e = 0; e < vectorLength; e++) syn0row[e] += neu1e[e]; // optionally release temp arrays if (vectorLength > 600) delete[] neu1e; }