int main(){ TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); // create table PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols); //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt"); T->Unique("Animal"); TTable Ts = *T; // did we fix problem with copy-c'tor ? //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols); //Ts->Unique(AnimalUnique); // test Select // create predicate tree: find all animals that are big and african or medium and Australian TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa"); TPredicate::TPredicateNode N1(A1); // Location == "Africa" TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big"); TPredicate::TPredicateNode N2(A2); // Size == "big" TPredicate::TPredicateNode N3(AND); N3.AddLeftChild(&N1); N3.AddRightChild(&N2); TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia"); TPredicate::TPredicateNode N4(A4); TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium"); TPredicate::TPredicateNode N5(A5); TPredicate::TPredicateNode N6(AND); N6.AddLeftChild(&N4); N6.AddRightChild(&N5); TPredicate::TPredicateNode N7(OR); N7.AddLeftChild(&N3); N7.AddRightChild(&N6); TPredicate Pred(&N7); TIntV SelectedRows; Ts.Select(Pred, SelectedRows); TStrV GroupBy; GroupBy.Add("Location"); T->Group(GroupBy, "LocationGroup"); GroupBy.Add("Size"); T->Group(GroupBy, "LocationSizeGroup"); T->Count("LocationCount", "Location"); PTable Tj = T->Join("Location", Ts, "Location"); TStrV UniqueAnimals; UniqueAnimals.Add("Animals_1.Animal"); UniqueAnimals.Add("Animals_2.Animal"); Tj->Unique(UniqueAnimals, false); //print table T->SaveSS("tests/animals_out_T.txt"); Ts.SaveSS("tests/animals_out_Ts.txt"); Tj->SaveSS("tests/animals_out_Tj.txt"); return 0; }
PBowDocBs TBowFl::LoadCiaWFBTxt( const TStr& FPath, const int& MxDocs, const TStr& SwSetTypeNm, const TStr& StemmerTypeNm, const int& MxNGramLen, const int& MnNGramFq){ // load CiaWFB documents into base PCiaWFBBs CiaWFBBs=TCiaWFBBs::LoadHtml(FPath); // prepare stop-words PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm); // prepare stemmer PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm); // create ngrams PNGramBs NGramBs; if (!((MxNGramLen==1)&&(MnNGramFq==1))){ TStrV HtmlStrV; for (int CountryN=0; CountryN<CiaWFBBs->GetCountries(); CountryN++){ PCiaWFBCountry CiaWFBCountry=CiaWFBBs->GetCountry(CountryN); HtmlStrV.Add(CiaWFBCountry->GetDescStr()); } NGramBs=TNGramBs::GetNGramBsFromHtmlStrV( HtmlStrV, MxNGramLen, MnNGramFq, SwSet, Stemmer); } // create document-base PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs); for (int CountryN=0; CountryN<CiaWFBBs->GetCountries(); CountryN++){ if (CountryN==MxDocs){break;} PCiaWFBCountry CiaWFBCountry=CiaWFBBs->GetCountry(CountryN); TStr CountryNm=CiaWFBCountry->GetCountryNm(); TStr DescStr=CiaWFBCountry->GetDescStr(); TStr CatNm=CiaWFBCountry->GetFldVal("Map references").GetTrunc(); TStrV CatNmV; CatNmV.Add(CatNm); BowDocBs->AddHtmlDoc(CountryNm, CatNmV, DescStr, true); } BowDocBs->AssertOk(); return BowDocBs; }
/////////////////////////////// // Tokenizer-Utils void TTokenizerUtil::Sentencize(const PSIn& SIn, TStrV& Sentences, const bool& SplitNewLineP) { TChA SentenceBuf; int c; while (!SIn->Eof()) { c = SIn->GetCh(); switch (c) { case '\r': case '\n': { if (!SplitNewLineP) { SentenceBuf += ' '; break; } } case '"' : case '.' : case '!' : case ':' : case ';' : case '?' : case '\t': { if (SentenceBuf.Len() > 2) { Sentences.Add(SentenceBuf); SentenceBuf.Clr(); } break; } default: SentenceBuf += c; break; } } if (SentenceBuf.Len() > 0) { Sentences.Add(SentenceBuf); } }
// Directory is timestamped in the web directory by default. void LogOutput::SetupNewOutputDirectory(TStr Directory) { if (!ShouldLog) return; // CASE 1: We need to set up a new directory if (Directory == TStr("")) { TSecTm Tm = TSecTm::GetCurTm(); TStr TimeStamp = Tm.GetDtYmdStr() + "_" + Tm.GetTmStr(); this->Directory = WebDirectory + TimeStamp; Err("New directory set up: %s\n", this->Directory.CStr()); } else { // CASE 2: The user has specified a directory - we just have to store it this->Directory = Directory; Err("Using existing directory: %s\n", this->Directory.CStr()); } // Copy directories over. TStrV Commands; Commands.Add("mkdir -p " + this->Directory); Commands.Add("cp -r resources/output/text " + this->Directory); Commands.Add("cp -r resources/output/web " + this->Directory); for (int i = 0; i < Commands.Len(); i++) { system(Commands[i].CStr()); } Err("Necessary files copied over to %s\n", this->Directory.CStr()); }
void TStemmer::GetStemmerTypeNmV(TStrV& StemmerTypeNmV, TStrV& StemmerTypeDNmV) { StemmerTypeNmV.Clr(); StemmerTypeDNmV.Clr(); StemmerTypeNmV.Add("none"); StemmerTypeDNmV.Add("None"); StemmerTypeNmV.Add("porter"); StemmerTypeDNmV.Add("English-Porter"); }
// Test drawing of SNAP graphs using GraphViz with color labeling TEST(GVizTest, DrawGViz) { PUNGraph UNGraph1; UNGraph1 = LoadEdgeList<PUNGraph>(TStr::Fmt("%s/sample_ungraph1.txt", DIRNAME)); PNGraph NGraph1; NGraph1 = LoadEdgeList<PNGraph>(TStr::Fmt("%s/sample_ngraph1.txt", DIRNAME)); mkdir(DIRNAME, S_IRWXU | S_IRWXG | S_IRWXO); TStrV LNames; // gvlDot, gvlNeato, gvlTwopi, gvlCirco LNames.Add("Dot"); LNames.Add("Neato"); LNames.Add("Twopi"); LNames.Add("Circo"); TStrV Exts; Exts.Add("ps"); //Exts.Add("gif"); Exts.Add("png"); for (int i = 0; i < LNames.Len(); i++) { for (int e = 0; e < Exts.Len(); e++) { for (int d = 0; d < 2; d++) { // Baseline file has already been created (use as benchmark) TStr FNameBase = TStr::Fmt("%s/base_%s_%s.%s", DIRNAME, d ? "ngraph" : "ungraph" , LNames[i].CStr(), Exts[e].CStr()); TStr FNameTest = TStr::Fmt("%s/test_%s_%s.%s", DIRNAME, d ? "ngraph" : "ungraph" , LNames[i].CStr(), Exts[e].CStr()); // Remove test graph if it already exists remove(FNameTest.CStr()); EXPECT_FALSE(fileExists(FNameTest.CStr())); // Draw new graph and check if created and equal to baseline (for ps only) if (d) { TSnap::DrawGViz(NGraph1, TGVizLayout(i), FNameTest, LNames[i], true); } else { TSnap::DrawGViz(UNGraph1, TGVizLayout(i), FNameTest, LNames[i], true); } // Check if file exists EXPECT_TRUE(fileExists(FNameTest.CStr())); #ifdef __linux // Compare directly for ps files, (can't compare png and gif due to EXIF-labels) if (Exts[e] == "ps") { EXPECT_TRUE(compareFiles(FNameBase.CStr(), FNameTest.CStr())); } #endif } } } }
void TFFile::GetFNmV( const TStr& FPath, const TStrV& FExtV, const bool& RecurseP, TStrV& FNmV){ // prepare file-directory traversal TStrV FPathV; FPathV.Add(FPath); TFFile FFile(FPathV, FExtV, "", RecurseP); TStr FNm; // traverse directory FNmV.Clr(); while (FFile.Next(FNm)){ FNmV.Add(FNm); } }
void TCpDoc::LoadReuters2000DocFromXml(const TStr& FNm, TStr& DocId, TStr& DateStr, TStr& TitleStr, TStr& HeadlineStr, TStr& BylineStr, TStr& DatelineStr, TStrV& ParStrV, TStrV& TopCdNmV, TStrV& GeoCdNmV, TStrV& IndCdNmV){ PXmlDoc Doc=TXmlDoc::LoadTxt(FNm); // get text strings // general document data DocId=Doc->GetTagTok("newsitem")->GetArgVal("itemid"); DateStr=Doc->GetTagTok("newsitem")->GetArgVal("date"); TitleStr=Doc->GetTagTok("newsitem|title")->GetTokStr(false); HeadlineStr=Doc->GetTagTok("newsitem|headline")->GetTokStr(false); BylineStr=""; PXmlTok BylineTok; if (Doc->IsTagTok("newsitem|byline", BylineTok)){ BylineStr=BylineTok->GetTokStr(false);} DatelineStr=""; PXmlTok DatelineTok; if (Doc->IsTagTok("newsitem|dateline", DatelineTok)){ DatelineStr=DatelineTok->GetTokStr(false);} // text paragraphs ParStrV.Clr(); TXmlTokV ParTokV; Doc->GetTagTokV("newsitem|text|p", ParTokV); for (int ParTokN=0; ParTokN<ParTokV.Len(); ParTokN++){ TStr ParStr=ParTokV[ParTokN]->GetTokStr(false); ParStrV.Add(ParStr); } // codes TopCdNmV.Clr(); GeoCdNmV.Clr(); IndCdNmV.Clr(); TXmlTokV CdsTokV; Doc->GetTagTokV("newsitem|metadata|codes", CdsTokV); for (int CdsTokN=0; CdsTokN<CdsTokV.Len(); CdsTokN++){ PXmlTok CdsTok=CdsTokV[CdsTokN]; TXmlTokV CdTokV; CdsTok->GetTagTokV("code", CdTokV); if (CdsTok->GetArgVal("class")=="bip:topics:1.0"){ for (int CdTokN=0; CdTokN<CdTokV.Len(); CdTokN++){ TStr CdNm=CdTokV[CdTokN]->GetArgVal("code"); TopCdNmV.Add(CdNm); } } else if (CdsTok->GetArgVal("class")=="bip:countries:1.0"){ for (int CdTokN=0; CdTokN<CdTokV.Len(); CdTokN++){ TStr CdNm=CdTokV[CdTokN]->GetArgVal("code"); GeoCdNmV.Add(CdNm); } } else if (CdsTok->GetArgVal("class")=="bip:industries:1.0"){ for (int CdTokN=0; CdTokN<CdTokV.Len(); CdTokN++){ TStr CdNm=CdTokV[CdTokN]->GetArgVal("code"); IndCdNmV.Add(CdNm); } } else { Fail; } } }
void GetKeywords(const PXmlTok& QueryXml, const TStr& TagPath, TStrV& KeywordsV, TStrV& IgnoreKeywordsV) { TXmlTokV KwsXmlV; QueryXml->GetTagTokV(TagPath, KwsXmlV); for (int KwInd = 0; KwInd < KwsXmlV.Len(); KwInd++) { TStr Kw = KwsXmlV[KwInd]->GetTokStr(false); int hide = KwsXmlV[KwInd]->GetIntArgVal("hide", 0); if (hide) IgnoreKeywordsV.Add(Kw); else KeywordsV.Add(Kw); } }
int main() { // create scheme TTable::Schema AnimalS; AnimalS.Add(TPair<TStr,TTable::TYPE>("Animal", TTable::STR)); AnimalS.Add(TPair<TStr,TTable::TYPE>("Size", TTable::STR)); AnimalS.Add(TPair<TStr,TTable::TYPE>("Location", TTable::STR)); AnimalS.Add(TPair<TStr,TTable::TYPE>("Number", TTable::INT)); // create table PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt"); //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt"); T->Unique("Animal"); //TTable Ts = *T; not working because of problem with copy-c'tor PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "animals.txt"); Ts->Unique("Animal"); // test Select // create predicate tree: find all animals that are big and african or medium and Australian TPredicate::TAtomicPredicate A1(TPredicate::STR, true, TPredicate::EQ, "Location", "", 0, 0, "Africa"); TPredicate::TPredicateNode N1(A1); // Location == "Africa" TPredicate::TAtomicPredicate A2(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "big"); TPredicate::TPredicateNode N2(A2); // Size == "big" TPredicate::TPredicateNode N3(TPredicate::AND); N3.AddLeftChild(&N1); N3.AddRightChild(&N2); TPredicate::TAtomicPredicate A4(TPredicate::STR, true, TPredicate::EQ, "Location", "", 0, 0, "Australia"); TPredicate::TPredicateNode N4(A4); TPredicate::TAtomicPredicate A5(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "medium"); TPredicate::TPredicateNode N5(A5); TPredicate::TPredicateNode N6(TPredicate::AND); N6.AddLeftChild(&N4); N6.AddRightChild(&N5); TPredicate::TPredicateNode N7(TPredicate::OR); N7.AddLeftChild(&N3); N7.AddRightChild(&N6); TPredicate Pred(&N7); Ts->Select(Pred); TStrV GroupBy; GroupBy.Add("Location"); T->Group("LocationGroup", GroupBy); GroupBy.Add("Size"); T->Group("LocationSizeGroup", GroupBy); T->Count("LocationCount", "Location"); PTable Tj = T->Join("Location", *Ts, "Location"); //print table T->SaveSS("animals_out_T.txt"); Ts->SaveSS("animals_out_Ts.txt"); Tj->SaveSS("animals_out_Tj.txt"); return 0; }
PBowDocBs TSkyGridBs::GetBowDocBs( const int& MxNGramLen, const int& MnNGramFq) const { // prepare stop-words PSwSet SwSet=TSwSet::GetSwSet(swstEn523); // prepare stemmer PStemmer Stemmer=TStemmer::GetStemmer(stmtPorter); // create ngrams PNGramBs NGramBs; if (!((MxNGramLen==1)&&(MnNGramFq==1))){ TStrV HtmlStrV; TSkyGridIdDocPrV IdDocPrV; GetIdDocPrV(IdDocPrV); for (int DocN=0; DocN<IdDocPrV.Len(); DocN++){ PSkyGridDoc Doc=IdDocPrV[DocN].Val2; TStr DocStr=Doc->GetHeadlineStr(); HtmlStrV.Add(DocStr); } NGramBs=TNGramBs::GetNGramBsFromHtmlStrV( HtmlStrV, MxNGramLen, MnNGramFq, SwSet, Stemmer); } // create bow PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs); TSkyGridIdDocPrV IdDocPrV; GetIdDocPrV(IdDocPrV); for (int DocN=0; DocN<IdDocPrV.Len(); DocN++){ int DocId=IdDocPrV[DocN].Val1; PSkyGridDoc Doc=IdDocPrV[DocN].Val2; TStr DocStr=Doc->GetHeadlineStr(); BowDocBs->AddHtmlDoc(TInt::GetStr(DocId), TStrV(), DocStr); } // return bow return BowDocBs; }
void TBowFl::SaveCpdToLnDocTxt(const TStr& InCpdFNm, const TStr& OutLnDocFNm){ TFOut FOut(OutLnDocFNm); FILE* fOut=FOut.GetFileId(); PSIn CpDocSIn=TCpDoc::FFirstCpd(InCpdFNm); PCpDoc CpDoc; int Docs=0; printf("Saving '%s' to '%s' ...\n", InCpdFNm.CStr(), OutLnDocFNm.CStr()); while (TCpDoc::FNextCpd(CpDocSIn, CpDoc)){ Docs++; if (Docs%100==0){printf("%d Docs\r", Docs);} // get document-name TStr DocNm=CpDoc->GetDocNm(); DocNm=TStr::GetFNmStr(DocNm); DocNm.ChangeChAll(' ', '_'); // get document-categories TStrV CatNmV; for (int CatN=0; CatN<CpDoc->GetCats(); CatN++){ CatNmV.Add(CpDoc->GetCatNm(CatN));} // get document-contents TChA DocChA=CpDoc->GetTxtStr(); DocChA.ChangeCh('\r', ' '); DocChA.ChangeCh('\n', ' '); // save document fprintf(fOut, "%s", DocNm.CStr()); for (int CatN=0; CatN<CatNmV.Len(); CatN++){ fprintf(fOut, " !%s", CatNmV[CatN].CStr());} fprintf(fOut, " %s\n", DocChA.CStr()); } printf("%d Docs\nDone.\n", Docs); }
PBowDocBs TNmObjBs::GetBowDocBs(const int& MnNmObjFq) const { printf("Generating Bag-Of-Words...\n"); // create bag-of-words PBowDocBs BowDocBs=TBowDocBs::New(); // traverse documents for (int DocId=0; DocId<GetDocs(); DocId++){ if (DocId%100==0){printf("%d\r", DocId);} TStr DocNm=GetDocNm(DocId); TStr DateStr=GetDocDateStr(DocId); TStrV WordStrV; int DocNmObjs=GetDocNmObjs(DocId); for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){ int NmObjId; int TermFq; GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq); if ((MnNmObjFq==-1)||(GetNmObjDocs(NmObjId)>=MnNmObjFq)){ TStr NmObjStr=GetNmObjStr(NmObjId); for (int TermOccN=0; TermOccN<TermFq; TermOccN++){ WordStrV.Add(NmObjStr); } } } if (!WordStrV.Empty()){ int DId=BowDocBs->AddDoc(DocNm, TStrV(), WordStrV); BowDocBs->PutDateStr(DId, DateStr); } } // return bag-of-words BowDocBs->AssertOk(); printf("\nDone.\n"); return BowDocBs; }
PBowDocBs TCordisEuProjBs::GetBowDocBsFromEuProjDesc() const { printf("Generating Bag-Of-Words...\n"); // create document vector TStrV HtmlStrV; int EuProjs=GetEuProjs(); for (int EuProjN=0; EuProjN<EuProjs; EuProjN++){ PCordisEuProj EuProj=GetEuProj(EuProjN); // get document & word ids TStr EuProjNm=EuProj->GetEuProjAcrStr(); TStr EuProjHtmlStr=EuProj->GetTitleStr()+" "+EuProj->GetEuProjDescHtmlStr(); HtmlStrV.Add(EuProjHtmlStr); } // create ngrams PSwSet SwSet=TSwSet::GetSwSet(swstEnglish523); PNGramBs NGramBs=TNGramBs::GetNGramBsFromHtmlStrV(HtmlStrV, 3, 3, SwSet); NGramBs->SaveTxt("NGram.Txt"); // create bag-of-words printf("\n"); PBowDocBs BowDocBs=TBowDocBs::New(); BowDocBs->PutNGramBs(NGramBs); {for (int EuProjN=0; EuProjN<EuProjs; EuProjN++){ if (EuProjN%100==0){printf("%d/%d\r", EuProjN, EuProjs);} PCordisEuProj EuProj=GetEuProj(EuProjN); TStr DocNm=EuProj->GetEuProjAcrStr(); TStr HtmlStr=EuProj->GetTitleStr()+" "+EuProj->GetEuProjDescHtmlStr(); BowDocBs->AddHtmlDoc(DocNm, TStrV(), HtmlStr); }} BowDocBs->AssertOk(); // return bag-of-words printf("\nDone.\n"); return BowDocBs; }
void LogOutput::PrintClusterInformation(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, PNGraph& QGraph, TIntV& ClusterIds, TSecTm PresentTime, TIntV &OldTopClusters) { if (!ShouldLog) return; TStr CurDateString = PresentTime.GetDtYmdStr(); Err("Writing cluster information...\n"); // PREVIOUS RANKING SETUP THash<TInt, TInt> OldRankings; if (OldTopClusters.Len() > 0) { for (int i = 0; i < OldTopClusters.Len(); i++) { OldRankings.AddDat(OldTopClusters[i], i + 1); } } TStrV RankStr; TStr ClusterJSONDirectory = Directory + "/web/json/clusters/"; for (int i = 0; i < ClusterIds.Len(); i++) { TStr OldRankStr; ComputeOldRankString(OldRankings, ClusterIds[i], i+1, OldRankStr); RankStr.Add(OldRankStr); // JSON file for each cluster! TPrintJson::PrintClusterJSON(QB, DB, CB, QGraph, ClusterJSONDirectory, ClusterIds[i], PresentTime); } Err("JSON Files for individual written!\n"); TStr JSONTableFileName = Directory + "/web/json/daily/" + CurDateString + ".json"; TPrintJson::PrintClusterTableJSON(QB, DB, CB, JSONTableFileName, ClusterIds, RankStr); Err("JSON Files for the cluster table written!\n"); }
void TStrUtil::GetStdNameV(TStr AuthorNames, TStrV& StdNameV) { AuthorNames.ChangeChAll('\n', ' '); AuthorNames.ToLc(); // split into author names TStrV AuthV, TmpV, Tmp2V; // split on 'and' AuthorNames.SplitOnStr(" and ", TmpV); int i; for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(',', Tmp2V); AuthV.AddV(Tmp2V); } // split on '&' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh('&', Tmp2V); AuthV.AddV(Tmp2V); } // split on ',' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(',', Tmp2V); AuthV.AddV(Tmp2V); } // split on ';' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(';', Tmp2V); AuthV.AddV(Tmp2V); } // standardize names StdNameV.Clr(); //printf("\n*** %s\n", AuthorNames.CStr()); for (i = 0; i < AuthV.Len(); i++) { TStr StdName = GetStdName(AuthV[i]); if (! StdName.Empty()) { //printf("\t%s ==> %s\n", AuthV[i].CStr(), StdName.CStr()); StdNameV.Add(StdName); } } }
void TEnv::GetVarNmValV(TStrV& VarNmValV) { VarNmValV.Clr(); int VarN=0; while (_environ[VarN]!=NULL) { VarNmValV.Add(_environ[VarN++]); } }
void TUStr::GetWordStrV(TStrV& WordStrV){ // clear word vector TUStrV WordUStrV; //WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordStrV.Add(WordUStr.GetStr()); // add word to word-vector WordUniChV.Clr(false); // clear char-vector } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }
int main(int argc, char* argv[]){ //test1(); TTableContext Context; // create scheme Schema PostS; PostS.Add(TPair<TStr,TAttrType>("Id", atInt)); PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt)); PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt)); PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr)); PostS.Add(TPair<TStr,TAttrType>("Score", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols); printf("Load done\n"); TStrV cols; cols.Add("OwnerUserId"); struct timeval begin, end; gettimeofday(&begin, NULL); P->Aggregate(cols, aaSum, "Score", "Sum"); gettimeofday(&end, NULL); double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); printf("Elapsed time:%.3lfs\n", diff / 1000000); if (atoi(argv[1]) == 0) return 0; P->SaveSS("tests/p3.txt"); return 0; }
void TSAppSrvFun::GetFldValV(const TStrKdV& FldNmValPrV, const TStr& FldNm, TStrV& FldValV) { FldValV.Clr(); int ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, "")); while (ValN != -1) { FldValV.Add(FldNmValPrV[ValN].Dat); ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, ""), ValN + 1); } }
void TJsonVal::GetArrStrV(TStrV& StrV) const { EAssert(IsArr()); for (int StrN = 0; StrN < GetArrVals(); StrN++) { PJsonVal ArrVal = GetArrVal(StrN); EAssert(ArrVal->IsStr()); StrV.Add(ArrVal->GetStr()); } }
bool TFFile::Next(TStr& FNm){ // if need to recurse if (!SubFFile.Empty()){ if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;} else {SubFFile=NULL;} } // for all required file-paths while (FPathN<FPathV.Len()){ // try to find anything within FPathV[FPathN] directory while (true) { // if directory not open -> open next first if (!FFileDesc->FDesc) { if ((++FPathN)<FPathV.Len()) { FFileDesc->FDesc = opendir(FPathV[FPathN].CStr()); } else break; if (!FFileDesc->FDesc) break; // failed to open this one; pass control to outer loop } FFileDesc->DirEnt = readdir(FFileDesc->FDesc); if (FFileDesc->DirEnt) { // found something TStr FBase = FFileDesc->GetFBase(); FNm = FPathV[FPathN]+FBase; struct stat Stat; int ErrCd = stat(FNm.CStr(), &Stat); Assert(ErrCd==0); // !bn: assert-with-exception [pa se drugje po tej funkciji] if (S_ISREG(Stat.st_mode)) { if ((FBase!=".")&&(FBase!="..")){ TStr FExt=FNm.GetFExt(); if (!CsImpP){FExt.ToUc(); FBase.ToUc();} if (((FExtV.Empty())||(FExtV.SearchForw(FExt)!=-1))&& ((FBaseWc.Empty())||(FBase.IsWcMatch(FBaseWc)))){ CurFNm=FNm; CurFNmN++; return true;} } } else if (S_ISDIR(Stat.st_mode) && RecurseP) { if ((FBase!=".")&&(FBase!="..")){ TStr SubFPath=FPathV[FPathN]+FBase; TStrV SubFPathV; SubFPathV.Add(SubFPath); SubFFile=New(SubFPathV, FExtV, FBaseWc, RecurseP); if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;} else {SubFFile=NULL;} } } } else { // end of directory; clean up (ignore DirEnt, it's allocated within FDesc), pass control to outer loop FFileDesc->DirEnt = NULL; int ErrCd = closedir(FFileDesc->FDesc); FFileDesc->FDesc = NULL; Assert(ErrCd==0); break; } } } // not found CurFNm=""; CurFNmN=-1; return false; }
void TNEANet::AttrNameEI(const TInt& EId, TStrIntPrH::TIter EdgeHI, TStrV& Names) const { Names = TVec<TStr>(); while (!EdgeHI.IsEnd()) { if (!EdgeAttrIsDeleted(EId, EdgeHI)) { Names.Add(EdgeHI.GetKey()); } EdgeHI++; } }
// copy files for a particular folder info void TBackupProfile::CopyFolder(const TStr& BaseTargetFolder, const TStr& SourceFolder, const TStrV& Extensions, const TStrV& SkipIfContainingV, const bool& IncludeSubfolders, const bool& ReportP, TStr& ErrMsg) { try { // get the name of the source folder TStrV PathV; TDir::SplitPath(SourceFolder, PathV); EAssert(PathV.Len() > 0); // create the folder in the base target folder TStr TargetFolder = BaseTargetFolder + PathV[PathV.Len() - 1] + "/"; if (!TDir::Exists(TargetFolder)) TDir::GenDir(TargetFolder); // find files to be copied TStrV FileV; TFFile::GetFNmV(SourceFolder, Extensions, false, FileV); TStrV FolderV; // copy them for (int N = 0; N < FileV.Len(); N++) { // we found a file if (TFile::Exists(FileV[N])) { const TStr FileName = TDir::GetFileName(FileV[N]); // is this a file that we wish to ignore? bool ShouldCopy = true; for (int S = 0; S < SkipIfContainingV.Len(); S++) { if (FileName.SearchStr(SkipIfContainingV[S]) >= 0) ShouldCopy = false; } if (!ShouldCopy) continue; const TStr TargetFNm = TargetFolder + FileName; if (ReportP) TNotify::StdNotify->OnStatusFmt("Copying file: %s\r", FileName.CStr()); TFile::Copy(FileV[N], TargetFNm); } // we found a folder else { FolderV.Add(FileV[N]); } } if (IncludeSubfolders) { for (int N = 0; N < FolderV.Len(); N++) CopyFolder(TargetFolder, FolderV[N], Extensions, SkipIfContainingV, IncludeSubfolders, ReportP, ErrMsg); } } catch (PExcept E) { if (ErrMsg != "") ErrMsg += "\n"; ErrMsg += "Exception while copying from " + SourceFolder + ": " + E->GetMsgStr(); } catch (...) { if (ErrMsg != "") ErrMsg += "\n"; ErrMsg += "Exception while copying from " + SourceFolder + ": " + "Unrecognized exception occured."; } }
void TNEANet::AttrValueNI(const TInt& NId , TStrIntPrH::TIter NodeHI, TStrV& Values) const { Values = TVec<TStr>(); while (!NodeHI.IsEnd()) { if (!NodeAttrIsDeleted(NId, NodeHI)) { Values.Add(GetNodeAttrValue(NId, NodeHI)); } NodeHI++; } }
void TNEANet::FltAttrNameEI(const TInt& EId, TStrIntPrH::TIter EdgeHI, TStrV& Names) const { Names = TVec<TStr>(); while (!EdgeHI.IsEnd()) { if (EdgeHI.GetDat().Val1 == FltType && !EdgeAttrIsFltDeleted(EId, EdgeHI)) { Names.Add(EdgeHI.GetKey()); } EdgeHI++; } }
void TNEANet::AttrNameNI(const TInt& NId, TStrIntPrH::TIter NodeHI, TStrV& Names) const { Names = TVec<TStr>(); while (!NodeHI.IsEnd()) { if (!NodeAttrIsDeleted(NId, NodeHI)) { Names.Add(NodeHI.GetKey()); } NodeHI++; } }
void TNEANet::AttrValueEI(const TInt& EId, TStrIntPrH::TIter EdgeHI, TStrV& Values) const { Values = TVec<TStr>(); while (!EdgeHI.IsEnd()) { if (!EdgeAttrIsDeleted(EId, EdgeHI)) { Values.Add(GetNodeAttrValue(EId, EdgeHI)); } EdgeHI++; } }
void TNEANet::FltAttrNameNI(const TInt& NId, TStrIntPrH::TIter NodeHI, TStrV& Names) const { Names = TVec<TStr>(); while (!NodeHI.IsEnd()) { if (NodeHI.GetDat().Val1 == FltType && !NodeAttrIsFltDeleted(NId, NodeHI)) { Names.Add(NodeHI.GetKey()); } NodeHI++; } }
TStrV TEnv::GetIfArgPrefixStrV( const TStr& PrefixStr, TStrV& DfValV, const TStr& DNm) const { TStrV ArgValV; if (Env.GetArgs()<=MnArgs) { // 'usage' argument message if (!SilentP) { printf(" %s%s (default:", PrefixStr.CStr(), DNm.CStr()); for (int DfValN=0; DfValN<DfValV.Len(); DfValN++) { if (DfValN>0) { printf(", "); } printf("'%s'", DfValV[DfValN].CStr()); } printf(")\n"); } return ArgValV; } else { // argument & value message TStr ArgValVChA; for (int ArgN=0; ArgN<GetArgs(); ArgN++) { // get argument string TStr ArgStr=GetArg(ArgN); if (ArgStr.StartsWith(PrefixStr)) { // extract & add argument value ArgStr.DelStr(PrefixStr); ArgValV.Add(ArgStr); // add to message string if (ArgValV.Len()>1) { ArgValVChA+=", "; } ArgValVChA+=ArgValV.Last(); } } if (ArgValV.Empty()) { ArgValV=DfValV; } // output argument values TChA MsgChA; MsgChA+=" "+DNm; MsgChA+=" ("; MsgChA+=PrefixStr; MsgChA+=")="; for (int ArgValN=0; ArgValN<ArgValV.Len(); ArgValN++) { if (ArgValN>0) { MsgChA+=", "; } MsgChA+="'"; MsgChA+=ArgValV[ArgValN]; MsgChA+="'"; } if (!SilentP) { TNotify::OnStatus(Notify, MsgChA); } return ArgValV; } }