v8::Local<v8::Value> TNodeJsUtil::GetStrArr(const TStrV& StrV) { v8::Isolate* Isolate = v8::Isolate::GetCurrent(); v8::EscapableHandleScope EscapableHandleScope(Isolate); v8::Local<v8::Array> JsStrV = v8::Array::New(Isolate, StrV.Len()); for (int StrN = 0; StrN < StrV.Len(); StrN++) { JsStrV->Set(StrN, v8::String::NewFromUtf8(Isolate, StrV[StrN].CStr())); } return EscapableHandleScope.Escape(JsStrV); }
int TTransCorpus::CountWords(const TIntStrH& StrH) { int Words = 0, KeyId = StrH.FFirstKeyId(); while (StrH.FNextKeyId(KeyId)) { const TStr& Str = StrH[KeyId]; TStrV WordV; Str.SplitOnWs(WordV); Words += WordV.Len(); } return Words; }
void TSwSet::LoadFromFile(const TStr& FNm) { TStr FileStr = TStr::LoadTxt(FNm); FileStr.DelChAll('\r'); TStrV WordV; FileStr.SplitOnAllCh('\n', WordV); for (int WordN = 0; WordN < WordV.Len(); WordN++) { const TStr& WordStr = WordV[WordN]; if (!IsIn(WordStr)) { AddWord(WordStr); } } }
void TFile::DelWc(const TStr& WcStr, const bool& RecurseDirP){ // collect file-names TStrV FNmV; TFFile FFile(WcStr, RecurseDirP); TStr FNm; while (FFile.Next(FNm)){ FNmV.Add(FNm);} // delete files for (int FNmN=0; FNmN<FNmV.Len(); FNmN++){ Del(FNmV[FNmN], false);} }
void TAmazonItemBs::GetVoidItemIdV(TStrV& VoidItemIdV) const { VoidItemIdV.Clr(); for (int ItemIdN=0; ItemIdN<GetItems(); ItemIdN++){ PAmazonItem Item=GetItem(ItemIdN); for (int NextItemIdN=0; NextItemIdN<Item->GetNextItemIds(); NextItemIdN++){ TStr NextItemId=Item->GetNextItemId(NextItemIdN); if (!IsItem(NextItemId)){VoidItemIdV.AddUnique(NextItemId);} } } }
TStrV TTable::GetEdgeStrAttrV() const { TStrV StrEA = TStrV(StrColMaps.Len(),0); for (int i = 0; i < EdgeAttrV.Len(); i++) { TStr Attr = EdgeAttrV[i]; if (GetColType(Attr) == STR) { StrEA.Add(Attr); } } return StrEA; }
TStrV TTable::GetDstNodeStrAttrV() const { TStrV StrNA = TStrV(StrColMaps.Len(),0); for (int i = 0; i < DstNodeAttrV.Len(); i++) { TStr Attr = DstNodeAttrV[i]; if (GetColType(Attr) == STR) { StrNA.Add(Attr); } } return StrNA; }
TStrV TTable::GetEdgeFltAttrV() const { TStrV FltEA = TStrV(FltCols.Len(),0);; for (int i = 0; i < EdgeAttrV.Len(); i++) { TStr Attr = EdgeAttrV[i]; if (GetColType(Attr) == FLT) { FltEA.Add(Attr); } } return FltEA; }
TStrV TTable::GetDstNodeFltAttrV() const { TStrV FltNA = TStrV(FltCols.Len(),0); for (int i = 0; i < DstNodeAttrV.Len(); i++) { TStr Attr = DstNodeAttrV[i]; if (GetColType(Attr) == FLT) { FltNA.Add(Attr); } } return FltNA; }
TStrV TTable::GetEdgeIntAttrV() const { TStrV IntEA = TStrV(IntCols.Len(),0); for (int i = 0; i < EdgeAttrV.Len(); i++) { TStr Attr = EdgeAttrV[i]; if (GetColType(Attr) == INT) { IntEA.Add(Attr); } } return IntEA; }
TStrV TTable::GetDstNodeIntAttrV() const { TStrV IntNA = TStrV(IntCols.Len(),0); for (int i = 0; i < DstNodeAttrV.Len(); i++) { TStr Attr = DstNodeAttrV[i]; if (GetColType(Attr) == INT) { IntNA.Add(Attr); } } return IntNA; }
// return the list of folders containing backups for a given profile name // folders are sorted from the oldest to the newest void TFolderBackup::GetBackupFolders(const TStr& ProfileName, TStrV& FolderNmV) const { FolderNmV.Clr(); if (ProfileH.IsKey(ProfileName)) { TBackupProfile Profile = ProfileH.GetDat(ProfileName); const TVec<TBackupLogInfo>& LogV = Profile.GetLogs(); for (int N = 0; N < LogV.Len(); N++) FolderNmV.Add(LogV[N].GetFolderName()); } }
void TNGramBs::GetNGramStrV( const TStr& HtmlStr, TStrV& NGramStrV, TIntPrV& NGramBEChXPrV) const { TIntV NGramIdV; NGramStrV.Clr(); NGramBEChXPrV.Clr(); TNGramBs::GetNGramIdV(HtmlStr, NGramIdV, NGramBEChXPrV); NGramStrV.Gen(NGramIdV.Len(), 0); for (int NGramIdN=0; NGramIdN<NGramIdV.Len(); NGramIdN++){ TStr NGramStr=GetNGramStr(NGramIdV[NGramIdN]); NGramStrV.Add(NGramStr); } }
void TExpHelp::GetObjHdNmV(const TStr& CatNm, TStrV& ObjHdNmV) const { ObjHdNmV.Clr(); for (int ObjN=0; ObjN<ObjV.Len(); ObjN++){ TStr ObjCatNm=ObjV[ObjN]->GetCatNm(); TStr ObjHdNm=ObjV[ObjN]->GetHdItem()->GetNm(); if ((CatNm.Empty())||(CatNm=="All")||(CatNm==ObjCatNm)){ ObjHdNmV.AddUnique(ObjHdNm);} } ObjHdNmV.Sort(); }
PBowDocBs TBowFl::LoadReuters21578Txt( const TStr& FPath, const int& MxDocs, const TStr& SwSetTypeNm, const TStr& StemmerTypeNm, const int& MxNGramLen, const int& MnNGramFq, const bool& SaveDocP, const PNotify& Notify){ // prepare stop-words PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm); // prepare stemmer PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm); // create ngrams PNGramBs NGramBs; if (!((MxNGramLen==1)&&(MnNGramFq==1))){ NGramBs=TNGramBs::GetNGramBsFromReuters21578( FPath, MxDocs, MxNGramLen, MnNGramFq, SwSet, Stemmer); } // create document-base PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs); // traverse directory TFFile FFile(FPath, ".SGM", false); TStr FNm; int Docs=0; while (FFile.Next(FNm)){ printf("Processing file '%s'\n", FNm.CStr()); TIntH DocWIdToFqH(100); TXmlDocV LDocV; TXmlDoc::LoadTxt(FNm, LDocV); for (int LDocN=0; LDocN<LDocV.Len(); LDocN++){ Docs++; if (Docs%100==0){printf("%d\r", Docs);} if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;} // get document-name PXmlDoc Doc=LDocV[LDocN]; PXmlTok DocTok=Doc->GetTok(); TStr DocNm=DocTok->GetArgVal("NEWID"); // get document-categories TStrV CatNmV; TXmlTokV TopicsTokV; Doc->GetTagTokV("REUTERS|TOPICS|D", TopicsTokV); for (int TopicsTokN=0; TopicsTokN<TopicsTokV.Len(); TopicsTokN++){ TStr CatNm=TopicsTokV[TopicsTokN]->GetTokStr(false); CatNmV.Add(CatNm); } // get document-contents PXmlTok DocStrTok=Doc->GetTagTok("REUTERS|TEXT"); TStr DocStr=DocStrTok->GetTokStr(false); // add document to bow int DId=BowDocBs->AddHtmlDoc(DocNm, CatNmV, DocStr, SaveDocP); // train & test data if ((DocTok->GetArgVal("LEWISSPLIT")=="TRAIN")&&(DocTok->GetArgVal("TOPICS")=="YES")){ BowDocBs->AddTrainDId(DId);} if ((DocTok->GetArgVal("LEWISSPLIT")=="TEST")&&(DocTok->GetArgVal("TOPICS")=="YES")){ BowDocBs->AddTestDId(DId);} } if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;} } // return results BowDocBs->AssertOk(); return BowDocBs; }
void TFFile::GetFNmV( const TStr& FPath, const TStrV& FExtV, const bool& RecurseP, TStrV& FNmV){ // prepare file-directory traversal TStrV FPathV; FPathV.Add(FPath); TFFile FFile(FPathV, FExtV, "", RecurseP); TStr FNm; // traverse directory FNmV.Clr(); while (FFile.Next(FNm)){ FNmV.Add(FNm); } }
bool TFolderBackup::RestoreLatest(const TStr& ProfileName, const TBackupProfile::ERestoringMode& RestoringMode) const { if (ProfileH.IsKey(ProfileName)) { TStrV FolderV; GetBackupFolders(ProfileName, FolderV); if (FolderV.Len() > 0) { ProfileH.GetDat(ProfileName).Restore(FolderV[FolderV.Len()-1], RestoringMode, ReportP); return true; } } return false; }
void TEnv::GetVarNmV(TStrV& VarNmV) { VarNmV.Clr(); int VarN=0; while (_environ[VarN]!=NULL) { TStr VarNmVal=_environ[VarN++]; TStr VarNm; TStr VarVal; VarNmVal.SplitOnCh(VarNm, '=', VarVal); VarNmV.Add(VarNm); } }
void TFtrGenBs::Update(const TStrV& FtrValV) { EAssert(State == fgsUpdate); EAssert(FtrValV.Len() == FtrGenV.Len()); try { for (int FtrValN = 0; FtrValN < FtrValV.Len(); FtrValN++) { FtrGenV[FtrValN]->Update(FtrValV[FtrValN]); } } catch (PExcept Ex) { TExcept::Throw(TStr::Fmt("Error feature generation: '%s'!", Ex->GetMsgStr().CStr())); } }
void TFtrGenSparseNumeric::Add(const TStr& Str, TIntFltKdV& SpV, int& Offset) const { TStrV EltV; Str.SplitOnAllCh(';', EltV); TIntH UsedIdH; for (int EltN = 0; EltN < EltV.Len(); EltN++) { int Id; TStr Val; Split(EltV[EltN], Id, Val); EAssertR(!UsedIdH.IsKey(Id), "Field ID repeated in '" + Str + "'!"); int TmpOffset = Offset + Id; FtrGen->Add(Val, SpV, TmpOffset); UsedIdH.AddKey(Id); } Offset += GetVals(); }
void TWebPgFetchEvent::ChangeLastUrlToLc(const PHttpResp& HttpResp){ static TStr MsNm="Microsoft"; static TStr HttpsNm="HTTPS"; TStr SrvNm=HttpResp->GetSrvNm(); if ((SrvNm.StartsWith(MsNm))||(SrvNm.StartsWith(HttpsNm))){ if (!UrlStrV.Last().IsLc()){ PUrl Url=TUrl::New(UrlStrV.Last()); Url->ToLcPath(); UrlStrV.Last()=Url->GetUrlStr(); } } }
// copy files for a particular folder info void TBackupProfile::CopyFolder(const TStr& BaseTargetFolder, const TStr& SourceFolder, const TStrV& Extensions, const TStrV& SkipIfContainingV, const bool& IncludeSubfolders, const bool& ReportP, TStr& ErrMsg) { try { // get the name of the source folder TStrV PathV; TDir::SplitPath(SourceFolder, PathV); EAssert(PathV.Len() > 0); // create the folder in the base target folder TStr TargetFolder = BaseTargetFolder + PathV[PathV.Len() - 1] + "/"; if (!TDir::Exists(TargetFolder)) TDir::GenDir(TargetFolder); // find files to be copied TStrV FileV; TFFile::GetFNmV(SourceFolder, Extensions, false, FileV); TStrV FolderV; // copy them for (int N = 0; N < FileV.Len(); N++) { // we found a file if (TFile::Exists(FileV[N])) { const TStr FileName = TDir::GetFileName(FileV[N]); // is this a file that we wish to ignore? bool ShouldCopy = true; for (int S = 0; S < SkipIfContainingV.Len(); S++) { if (FileName.SearchStr(SkipIfContainingV[S]) >= 0) ShouldCopy = false; } if (!ShouldCopy) continue; const TStr TargetFNm = TargetFolder + FileName; if (ReportP) TNotify::StdNotify->OnStatusFmt("Copying file: %s\r", FileName.CStr()); TFile::Copy(FileV[N], TargetFNm); } // we found a folder else { FolderV.Add(FileV[N]); } } if (IncludeSubfolders) { for (int N = 0; N < FolderV.Len(); N++) CopyFolder(TargetFolder, FolderV[N], Extensions, SkipIfContainingV, IncludeSubfolders, ReportP, ErrMsg); } } catch (PExcept E) { if (ErrMsg != "") ErrMsg += "\n"; ErrMsg += "Exception while copying from " + SourceFolder + ": " + E->GetMsgStr(); } catch (...) { if (ErrMsg != "") ErrMsg += "\n"; ErrMsg += "Exception while copying from " + SourceFolder + ": " + "Unrecognized exception occured."; } }
void TFtrGenToken::Update(const TStr& Val) { TStrV TokenStrV; GetTokenV(Val, TokenStrV); TStrH TokenStrH; for (int TokenStrN = 0; TokenStrN < TokenStrV.Len(); TokenStrN++) { const TStr& TokenStr = TokenStrV[TokenStrN]; TokenStrH.AddKey(TokenStr); } int KeyId = TokenStrH.FFirstKeyId(); while (TokenStrH.FNextKeyId(KeyId)) { const TStr& TokenStr = TokenStrH.GetKey(KeyId); TokenH.AddDat(TokenStr)++; } Docs++; }
void TGreedyAlg::addCascade(const TStr& cascadeStr) { TStrV NIdV; cascadeStr.SplitOnAllCh(';', NIdV); TCascade C; for (int i = 0; i < NIdV.Len(); i++) { TStr NId, Tm; NIdV[i].SplitOnCh(NId, ',', Tm); IAssert( IsNodeNm(NId.GetInt()) ); GetNodeInfo(NId.GetInt()).Vol = GetNodeInfo(NId.GetInt()).Vol + 1; C.Add(NId.GetInt(), Tm.GetFlt()); } C.Sort(); cascadeV.Add(C); }
void TEvalScore::Parse(const TStr& Str, TIntV& WIdV) { TStrV TokenV; Tokenize(Str, TokenV); WIdV.Clr(); for (int WdN = 0; WdN < TokenV.Len(); WdN++) { // get the word string TStr WdStr = TokenV[WdN]; // get id of the word int WId = WordH.GetKeyId(WdStr); // word does not exist yet, add it to the hash table if (WId == -1) { WId = WordH.AddKey(WdStr); } // add word to the parsed sentence WIdV.Add(WId); } }
PNGramBs TNGramBs::GetNGramBsFromLnDoc( const TStr& LnDocFNm, const bool& NamedP, const int& MxDocs, const int& MxNGramLen, const int& MnNGramFq, const PSwSet& SwSet, const PStemmer& Stemmer){ // create n-gram-base /* printf("Generating frequent n-grams (MaxLen:%d MinFq:%d) ...\n", MxNGramLen, MnNGramFq); */ PNGramBs NGramBs=TNGramBs::New(MxNGramLen, MnNGramFq, SwSet, Stemmer); // interations over document-set while (!NGramBs->IsFinished()){ // open line-doc file TFIn FIn(LnDocFNm); char Ch=' '; int Docs=0; while (!FIn.Eof()){ if(Ch == '\r' || Ch == '\n'){Ch = FIn.GetCh(); continue;} Docs++; if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;} //printf("%d\r", Docs); // document name TChA DocNm; if (NamedP){ Ch=FIn.GetCh(); while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){ DocNm+=Ch; Ch=FIn.GetCh();} DocNm.Trunc(); if (DocNm.Empty()){Docs--; continue;} } // categories TStrV CatNmV; forever { while ((!FIn.Eof())&&(Ch==' ')){Ch=FIn.GetCh();} if (Ch=='!'){ TChA CatNm; while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){ CatNm+=Ch; Ch=FIn.GetCh();} if (!CatNm.Empty()){CatNmV.Add(CatNm);} } else { break; } } // document text TChA DocChA; while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')){ DocChA+=Ch; Ch=FIn.GetCh();} // extract words & update ngram-base /* printf(" Pass %2d: %6d Docs\r", NGramBs->GetPassN(), Docs); */ _UpdateNGramBsFromHtmlStr(NGramBs, DocChA, SwSet, Stemmer); } NGramBs->ConcPass(); } /* printf("\nDone.\n"); */ // return return NGramBs; }
void TCpDoc::SaveLnDocToCpd( const TStr& LnDocFNm, const TStr& OutCpdFNm, const bool& NamedP, const int& MxDocs){ printf("Saving Line-Document '%s' to '%s' ...\n", LnDocFNm.CStr(), OutCpdFNm.CStr()); // create output file PSOut SOut=TFOut::New(OutCpdFNm); // open line-doc file TFIn FIn(LnDocFNm); char Ch=' '; int Docs=0; while (!FIn.Eof()){ Docs++; if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;} printf("%d\r", Docs); // document name TChA DocNm; if (NamedP){ Ch=FIn.GetCh(); while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){ DocNm+=Ch; Ch=FIn.GetCh();} DocNm.Trunc(); if (DocNm.Empty()){Docs--; continue;} } // categories TStrV CatNmV; forever { while ((!FIn.Eof())&&(Ch==' ')){Ch=FIn.GetCh();} if (Ch=='!'){ if (!FIn.Eof()){Ch=FIn.GetCh();} TChA CatNm; while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){ CatNm+=Ch; Ch=FIn.GetCh();} if (!CatNm.Empty()){CatNmV.Add(CatNm);} } else { break; } } // document text TChA DocChA; while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')){ DocChA+=Ch; Ch=FIn.GetCh();} // skip empty documents (empty lines) if (DocNm.Empty()&&DocChA.Empty()){ continue;} // create & save cpd document PCpDoc CpDoc=TCpDoc::New(); CpDoc->DocNm=DocNm; CpDoc->ParStrV.Add(DocChA, 1); for (int CatNmN=0; CatNmN<CatNmV.Len(); CatNmN++){ CpDoc->TopCdNmV.Add(CatNmV[CatNmN]);} CpDoc->Save(*SOut); } printf("\nDone.\n"); }
void TNetInfBs::AddCasc(const TStr& CascStr, const int& Model, const double& alpha) { TStrV NIdV; CascStr.SplitOnAllCh(',', NIdV); TCascade C(alpha, Model); for (int i = 0; i < NIdV.Len(); i+=2) { int NId; double Tm; NId = NIdV[i].GetInt(); Tm = NIdV[i+1].GetFlt(); GetNodeInfo(NId).Vol = GetNodeInfo(NId).Vol + 1; C.Add(NId, Tm); } C.Sort(); CascV.Add(C); }
///////////////////////////////////////////////// // EuPartner TStr TCordisEuPart::ExtrCountry(const TStr& AddrStr){ TStr CountryStr; TStrV LnV; AddrStr.SplitOnStr("<br>", LnV); if (LnV.Len()>0){ CountryStr=LnV.Last(); if (CountryStr.Empty()&&(LnV.Len()>1)){ CountryStr=LnV[LnV.Len()-2];} CountryStr.DelChAll('\r'); CountryStr.DelChAll('\n'); } if (CountryStr.Empty()){ printf("Country Field Not Found!\n");} return CountryStr; }
void LogOutput::WriteClusteringStatisticsToFile(TSecTm& Date) { if (!ShouldLog) return; TStr FileName = Directory + "/text/statistics/statistics_" + Date.GetDtYmdStr() + ".txt"; FILE *F = fopen(FileName.CStr(), "w"); TStrV Keys; OutputValues.GetKeyV(Keys); for (int i = 0; i < Keys.Len(); ++i) { fprintf(F, "%s\t%s\n", Keys[i].CStr(), OutputValues.GetDat(Keys[i]).CStr()); } fclose(F); }