void trainCBOW(){ if(rank == 0){ readVocab(); InitNet(); if (negative > 0){ InitUnigramTable(); } } trainModelParallelCBOW(); }
void LearnEmbeddings(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize, int& Iter, bool& Verbose, TIntFltVH& EmbeddingsHV) { TIntIntH RnmH; TIntIntH RnmBackH; int64 NNodes = 0; //renaming nodes into consecutive numbers for (int i = 0; i < WalksVV.GetXDim(); i++) { for (int64 j = 0; j < WalksVV.GetYDim(); j++) { if ( RnmH.IsKey(WalksVV(i, j)) ) { WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j)); } else { RnmH.AddDat(WalksVV(i,j),NNodes); RnmBackH.AddDat(NNodes,WalksVV(i, j)); WalksVV(i, j) = NNodes++; } } } TIntV Vocab(NNodes); LearnVocab(WalksVV, Vocab); TIntV KTable(NNodes); TFltV UTable(NNodes); TVVec<TFlt, int64> SynNeg; TVVec<TFlt, int64> SynPos; TRnd Rnd(time(NULL)); InitPosEmb(Vocab, Dimensions, Rnd, SynPos); InitNegEmb(Vocab, Dimensions, SynNeg); InitUnigramTable(Vocab, KTable, UTable); TFltV ExpTable(TableSize); double Alpha = StartAlpha; //learning rate #pragma omp parallel for schedule(dynamic) for (int i = 0; i < TableSize; i++ ) { double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision); ExpTable[i] = TMath::Power(TMath::E, Value); } int64 WordCntAll = 0; // op RS 2016/09/26, collapse does not compile on Mac OS X //#pragma omp parallel for schedule(dynamic) collapse(2) for (int j = 0; j < Iter; j++) { #pragma omp parallel for schedule(dynamic) for (int64 i = 0; i < WalksVV.GetXDim(); i++) { TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable, WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos); } } if (Verbose) { printf("\n"); fflush(stdout); } for (int64 i = 0; i < SynPos.GetXDim(); i++) { TFltV CurrV(SynPos.GetYDim()); for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); } EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV); } }