Example #1
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  // create table
  PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);
  //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
  T->Unique("Animal");
  TTable Ts = *T;  // did we fix problem with copy-c'tor ?
  //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols);
  //Ts->Unique(AnimalUnique);

  // test Select
  // create predicate tree: find all animals that are big and african or medium and Australian
  TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa");  
  TPredicate::TPredicateNode N1(A1);  // Location == "Africa"
  TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big");  
  TPredicate::TPredicateNode N2(A2);  // Size == "big"
  TPredicate::TPredicateNode N3(AND);
  N3.AddLeftChild(&N1);
  N3.AddRightChild(&N2);
  TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia");  
  TPredicate::TPredicateNode N4(A4);  
  TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium");  
  TPredicate::TPredicateNode N5(A5); 
  TPredicate::TPredicateNode N6(AND);
  N6.AddLeftChild(&N4);
  N6.AddRightChild(&N5);
  TPredicate::TPredicateNode N7(OR);
  N7.AddLeftChild(&N3);
  N7.AddRightChild(&N6);
  TPredicate Pred(&N7);
  TIntV SelectedRows;
  Ts.Select(Pred, SelectedRows);

  TStrV GroupBy;
  GroupBy.Add("Location");
  T->Group(GroupBy, "LocationGroup");
  GroupBy.Add("Size");
  T->Group(GroupBy, "LocationSizeGroup");
  T->Count("LocationCount", "Location");
  PTable Tj = T->Join("Location", Ts, "Location");
  TStrV UniqueAnimals;
  UniqueAnimals.Add("Animals_1.Animal");
  UniqueAnimals.Add("Animals_2.Animal");
  Tj->Unique(UniqueAnimals, false);
  //print table
   T->SaveSS("tests/animals_out_T.txt");
   Ts.SaveSS("tests/animals_out_Ts.txt");
   Tj->SaveSS("tests/animals_out_Tj.txt");
  return 0;
}
Example #2
0
File: bowfl.cpp Project: Accio/snap
PBowDocBs TBowFl::LoadCiaWFBTxt(
 const TStr& FPath, const int& MxDocs,
 const TStr& SwSetTypeNm, const TStr& StemmerTypeNm,
 const int& MxNGramLen, const int& MnNGramFq){
  // load CiaWFB documents into base
  PCiaWFBBs CiaWFBBs=TCiaWFBBs::LoadHtml(FPath);
  // prepare stop-words
  PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm);
  // prepare stemmer
  PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm);
  // create ngrams
  PNGramBs NGramBs;
  if (!((MxNGramLen==1)&&(MnNGramFq==1))){
    TStrV HtmlStrV;
    for (int CountryN=0; CountryN<CiaWFBBs->GetCountries(); CountryN++){
      PCiaWFBCountry CiaWFBCountry=CiaWFBBs->GetCountry(CountryN);
      HtmlStrV.Add(CiaWFBCountry->GetDescStr());
    }
    NGramBs=TNGramBs::GetNGramBsFromHtmlStrV(
     HtmlStrV, MxNGramLen, MnNGramFq, SwSet, Stemmer);
  }
  // create document-base
  PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs);
  for (int CountryN=0; CountryN<CiaWFBBs->GetCountries(); CountryN++){
    if (CountryN==MxDocs){break;}
    PCiaWFBCountry CiaWFBCountry=CiaWFBBs->GetCountry(CountryN);
    TStr CountryNm=CiaWFBCountry->GetCountryNm();
    TStr DescStr=CiaWFBCountry->GetDescStr();
    TStr CatNm=CiaWFBCountry->GetFldVal("Map references").GetTrunc();
    TStrV CatNmV; CatNmV.Add(CatNm);
    BowDocBs->AddHtmlDoc(CountryNm, CatNmV, DescStr, true);
  }
  BowDocBs->AssertOk();
  return BowDocBs;
}
Example #3
0
///////////////////////////////
// Tokenizer-Utils
void TTokenizerUtil::Sentencize(const PSIn& SIn, TStrV& Sentences, const bool& SplitNewLineP) {
	TChA SentenceBuf;
	int c;
	while (!SIn->Eof()) {
		c = SIn->GetCh();
		switch (c) {
			case '\r':
			case '\n':	{
				if (!SplitNewLineP) {
					SentenceBuf += ' ';
					break;
				}
			}
			case '"' :
			case '.' :
			case '!' :
			case ':' :
			case ';' :
			case '?' :
			case '\t': {
				if (SentenceBuf.Len() > 2) {
					Sentences.Add(SentenceBuf);
					SentenceBuf.Clr();
				}
				break;
			}
			default: 
				SentenceBuf += c;
				break;
		}
	}
	if (SentenceBuf.Len() > 0) {
		Sentences.Add(SentenceBuf);
	}	
}
Example #4
0
// Directory is timestamped in the web directory by default.
void LogOutput::SetupNewOutputDirectory(TStr Directory) {
  if (!ShouldLog) return;
  // CASE 1: We need to set up a new directory
  if (Directory == TStr("")) {
    TSecTm Tm = TSecTm::GetCurTm();
    TStr TimeStamp = Tm.GetDtYmdStr() + "_" + Tm.GetTmStr();
    this->Directory = WebDirectory + TimeStamp;
    Err("New directory set up: %s\n", this->Directory.CStr());
  } else {
    // CASE 2: The user has specified a directory - we just have to store it
    this->Directory = Directory;
    Err("Using existing directory: %s\n", this->Directory.CStr());
  }

  // Copy directories over.
  TStrV Commands;
  Commands.Add("mkdir -p " + this->Directory);
  Commands.Add("cp -r resources/output/text " + this->Directory);
  Commands.Add("cp -r resources/output/web " + this->Directory);

  for (int i = 0; i < Commands.Len(); i++) {
    system(Commands[i].CStr());
  }
  Err("Necessary files copied over to %s\n", this->Directory.CStr());
}
Example #5
0
void TStemmer::GetStemmerTypeNmV(TStrV& StemmerTypeNmV, TStrV& StemmerTypeDNmV) {
    StemmerTypeNmV.Clr();
    StemmerTypeDNmV.Clr();
    StemmerTypeNmV.Add("none");
    StemmerTypeDNmV.Add("None");
    StemmerTypeNmV.Add("porter");
    StemmerTypeDNmV.Add("English-Porter");
}
Example #6
0
// Test drawing of SNAP graphs using GraphViz with color labeling
TEST(GVizTest, DrawGViz) {

  PUNGraph UNGraph1;
  UNGraph1 = LoadEdgeList<PUNGraph>(TStr::Fmt("%s/sample_ungraph1.txt", DIRNAME));
  
  PNGraph NGraph1;
  NGraph1 = LoadEdgeList<PNGraph>(TStr::Fmt("%s/sample_ngraph1.txt", DIRNAME));

  mkdir(DIRNAME, S_IRWXU | S_IRWXG | S_IRWXO);
  
  TStrV LNames; //  gvlDot, gvlNeato, gvlTwopi, gvlCirco
  LNames.Add("Dot");
  LNames.Add("Neato");
  LNames.Add("Twopi");
  LNames.Add("Circo");
  
  TStrV Exts;
  Exts.Add("ps");
  //Exts.Add("gif");
  Exts.Add("png");
  
  for (int i = 0; i < LNames.Len(); i++) {
    for (int e = 0; e < Exts.Len(); e++) {
      for (int d = 0; d < 2; d++) {
        
        // Baseline file has already been created (use as benchmark)
        TStr FNameBase = TStr::Fmt("%s/base_%s_%s.%s", DIRNAME, d ? "ngraph" : "ungraph" , LNames[i].CStr(), Exts[e].CStr());
        
        TStr FNameTest = TStr::Fmt("%s/test_%s_%s.%s", DIRNAME, d ? "ngraph" : "ungraph" , LNames[i].CStr(), Exts[e].CStr());
        
        // Remove test graph if it already exists
        remove(FNameTest.CStr());
        EXPECT_FALSE(fileExists(FNameTest.CStr()));
        
        // Draw new graph and check if created and equal to baseline (for ps only)
        if (d) {
          TSnap::DrawGViz(NGraph1, TGVizLayout(i), FNameTest, LNames[i], true);
        }
        else {
          TSnap::DrawGViz(UNGraph1, TGVizLayout(i), FNameTest, LNames[i], true);
        }
        // Check if file exists
        EXPECT_TRUE(fileExists(FNameTest.CStr()));
        
#ifdef __linux
        // Compare directly for ps files, (can't compare png and gif due to EXIF-labels)
        if (Exts[e] == "ps") {
          EXPECT_TRUE(compareFiles(FNameBase.CStr(), FNameTest.CStr()));
        }
#endif
      }
    }
  }

}
Example #7
0
void TFFile::GetFNmV(
 const TStr& FPath, const TStrV& FExtV, const bool& RecurseP, TStrV& FNmV){
  // prepare file-directory traversal
  TStrV FPathV; FPathV.Add(FPath);
  TFFile FFile(FPathV, FExtV, "", RecurseP); TStr FNm;
  // traverse directory
  FNmV.Clr();
  while (FFile.Next(FNm)){
    FNmV.Add(FNm);
  }
}
Example #8
0
void TCpDoc::LoadReuters2000DocFromXml(const TStr& FNm,
 TStr& DocId, TStr& DateStr, TStr& TitleStr,
 TStr& HeadlineStr, TStr& BylineStr, TStr& DatelineStr,
 TStrV& ParStrV,
 TStrV& TopCdNmV, TStrV& GeoCdNmV, TStrV& IndCdNmV){
  PXmlDoc Doc=TXmlDoc::LoadTxt(FNm);
  // get text strings
  // general document data
  DocId=Doc->GetTagTok("newsitem")->GetArgVal("itemid");
  DateStr=Doc->GetTagTok("newsitem")->GetArgVal("date");
  TitleStr=Doc->GetTagTok("newsitem|title")->GetTokStr(false);
  HeadlineStr=Doc->GetTagTok("newsitem|headline")->GetTokStr(false);
  BylineStr=""; PXmlTok BylineTok;
  if (Doc->IsTagTok("newsitem|byline", BylineTok)){
    BylineStr=BylineTok->GetTokStr(false);}
  DatelineStr=""; PXmlTok DatelineTok;
  if (Doc->IsTagTok("newsitem|dateline", DatelineTok)){
    DatelineStr=DatelineTok->GetTokStr(false);}
  // text paragraphs
  ParStrV.Clr(); TXmlTokV ParTokV; Doc->GetTagTokV("newsitem|text|p", ParTokV);
  for (int ParTokN=0; ParTokN<ParTokV.Len(); ParTokN++){
    TStr ParStr=ParTokV[ParTokN]->GetTokStr(false);
    ParStrV.Add(ParStr);
  }
  // codes
  TopCdNmV.Clr(); GeoCdNmV.Clr(); IndCdNmV.Clr();
  TXmlTokV CdsTokV; Doc->GetTagTokV("newsitem|metadata|codes", CdsTokV);
  for (int CdsTokN=0; CdsTokN<CdsTokV.Len(); CdsTokN++){
    PXmlTok CdsTok=CdsTokV[CdsTokN];
    TXmlTokV CdTokV; CdsTok->GetTagTokV("code", CdTokV);
    if (CdsTok->GetArgVal("class")=="bip:topics:1.0"){
      for (int CdTokN=0; CdTokN<CdTokV.Len(); CdTokN++){
        TStr CdNm=CdTokV[CdTokN]->GetArgVal("code");
        TopCdNmV.Add(CdNm);
      }
    } else
    if (CdsTok->GetArgVal("class")=="bip:countries:1.0"){
      for (int CdTokN=0; CdTokN<CdTokV.Len(); CdTokN++){
        TStr CdNm=CdTokV[CdTokN]->GetArgVal("code");
        GeoCdNmV.Add(CdNm);
      }
    } else
    if (CdsTok->GetArgVal("class")=="bip:industries:1.0"){
      for (int CdTokN=0; CdTokN<CdTokV.Len(); CdTokN++){
        TStr CdNm=CdTokV[CdTokN]->GetArgVal("code");
        IndCdNmV.Add(CdNm);
      }
    } else {
      Fail;
    }
  }
}
void GetKeywords(const PXmlTok& QueryXml, const TStr& TagPath, TStrV& KeywordsV, TStrV& IgnoreKeywordsV)
{
	TXmlTokV KwsXmlV;
	QueryXml->GetTagTokV(TagPath, KwsXmlV);
	for (int KwInd = 0; KwInd < KwsXmlV.Len(); KwInd++) 
	{
		TStr Kw = KwsXmlV[KwInd]->GetTokStr(false);
		int hide = KwsXmlV[KwInd]->GetIntArgVal("hide", 0);
		if (hide)
			IgnoreKeywordsV.Add(Kw);
		else
			KeywordsV.Add(Kw);
	}
}
Example #10
0
int main() {
    // create scheme
    TTable::Schema AnimalS;
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Animal", TTable::STR));
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Size", TTable::STR));
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Location", TTable::STR));
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Number", TTable::INT));
    // create table
    PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
    //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
    T->Unique("Animal");
    //TTable Ts = *T;  not working because of problem with copy-c'tor
    PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "animals.txt");
    Ts->Unique("Animal");

    // test Select
    // create predicate tree: find all animals that are big and african or medium and Australian
    TPredicate::TAtomicPredicate A1(TPredicate::STR, true, TPredicate::EQ, "Location", "", 0, 0, "Africa");
    TPredicate::TPredicateNode N1(A1);  // Location == "Africa"
    TPredicate::TAtomicPredicate A2(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "big");
    TPredicate::TPredicateNode N2(A2);  // Size == "big"
    TPredicate::TPredicateNode N3(TPredicate::AND);
    N3.AddLeftChild(&N1);
    N3.AddRightChild(&N2);
    TPredicate::TAtomicPredicate A4(TPredicate::STR, true, TPredicate::EQ, "Location", "", 0, 0, "Australia");
    TPredicate::TPredicateNode N4(A4);
    TPredicate::TAtomicPredicate A5(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "medium");
    TPredicate::TPredicateNode N5(A5);
    TPredicate::TPredicateNode N6(TPredicate::AND);
    N6.AddLeftChild(&N4);
    N6.AddRightChild(&N5);
    TPredicate::TPredicateNode N7(TPredicate::OR);
    N7.AddLeftChild(&N3);
    N7.AddRightChild(&N6);
    TPredicate Pred(&N7);
    Ts->Select(Pred);

    TStrV GroupBy;
    GroupBy.Add("Location");
    T->Group("LocationGroup", GroupBy);
    GroupBy.Add("Size");
    T->Group("LocationSizeGroup", GroupBy);
    T->Count("LocationCount", "Location");
    PTable Tj = T->Join("Location", *Ts, "Location");
    //print table
    T->SaveSS("animals_out_T.txt");
    Ts->SaveSS("animals_out_Ts.txt");
    Tj->SaveSS("animals_out_Tj.txt");
    return 0;
}
Example #11
0
PBowDocBs TSkyGridBs::GetBowDocBs(
 const int& MxNGramLen, const int& MnNGramFq) const {
  // prepare stop-words
  PSwSet SwSet=TSwSet::GetSwSet(swstEn523);
  // prepare stemmer
  PStemmer Stemmer=TStemmer::GetStemmer(stmtPorter);
  // create ngrams
  PNGramBs NGramBs;
  if (!((MxNGramLen==1)&&(MnNGramFq==1))){
    TStrV HtmlStrV;
    TSkyGridIdDocPrV IdDocPrV; GetIdDocPrV(IdDocPrV);
    for (int DocN=0; DocN<IdDocPrV.Len(); DocN++){
      PSkyGridDoc Doc=IdDocPrV[DocN].Val2;
      TStr DocStr=Doc->GetHeadlineStr();
      HtmlStrV.Add(DocStr);
    }
    NGramBs=TNGramBs::GetNGramBsFromHtmlStrV(
     HtmlStrV, MxNGramLen, MnNGramFq, SwSet, Stemmer);
  }
  // create bow
  PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs);
  TSkyGridIdDocPrV IdDocPrV; GetIdDocPrV(IdDocPrV);
  for (int DocN=0; DocN<IdDocPrV.Len(); DocN++){
    int DocId=IdDocPrV[DocN].Val1;
    PSkyGridDoc Doc=IdDocPrV[DocN].Val2;
    TStr DocStr=Doc->GetHeadlineStr();
    BowDocBs->AddHtmlDoc(TInt::GetStr(DocId), TStrV(), DocStr);
  }
  // return bow
  return BowDocBs;
}
Example #12
0
File: bowfl.cpp Project: Accio/snap
void TBowFl::SaveCpdToLnDocTxt(const TStr& InCpdFNm, const TStr& OutLnDocFNm){
  TFOut FOut(OutLnDocFNm); FILE* fOut=FOut.GetFileId();
  PSIn CpDocSIn=TCpDoc::FFirstCpd(InCpdFNm); PCpDoc CpDoc; int Docs=0;
  printf("Saving '%s' to '%s' ...\n", InCpdFNm.CStr(), OutLnDocFNm.CStr());
  while (TCpDoc::FNextCpd(CpDocSIn, CpDoc)){
    Docs++; if (Docs%100==0){printf("%d Docs\r", Docs);}
    // get document-name
    TStr DocNm=CpDoc->GetDocNm();
    DocNm=TStr::GetFNmStr(DocNm);
    DocNm.ChangeChAll(' ', '_');
    // get document-categories
    TStrV CatNmV;
    for (int CatN=0; CatN<CpDoc->GetCats(); CatN++){
      CatNmV.Add(CpDoc->GetCatNm(CatN));}
    // get document-contents
    TChA DocChA=CpDoc->GetTxtStr();
    DocChA.ChangeCh('\r', ' ');
    DocChA.ChangeCh('\n', ' ');
    // save document
    fprintf(fOut, "%s", DocNm.CStr());
    for (int CatN=0; CatN<CatNmV.Len(); CatN++){
      fprintf(fOut, " !%s", CatNmV[CatN].CStr());}
    fprintf(fOut, " %s\n", DocChA.CStr());
  }
  printf("%d Docs\nDone.\n", Docs);
}
PBowDocBs TNmObjBs::GetBowDocBs(const int& MnNmObjFq) const {
  printf("Generating Bag-Of-Words...\n");
  // create bag-of-words
  PBowDocBs BowDocBs=TBowDocBs::New();
  // traverse documents
  for (int DocId=0; DocId<GetDocs(); DocId++){
    if (DocId%100==0){printf("%d\r", DocId);}
    TStr DocNm=GetDocNm(DocId);
    TStr DateStr=GetDocDateStr(DocId);
    TStrV WordStrV;
    int DocNmObjs=GetDocNmObjs(DocId);
    for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){
      int NmObjId; int TermFq; GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq);
      if ((MnNmObjFq==-1)||(GetNmObjDocs(NmObjId)>=MnNmObjFq)){
        TStr NmObjStr=GetNmObjStr(NmObjId);
        for (int TermOccN=0; TermOccN<TermFq; TermOccN++){
          WordStrV.Add(NmObjStr);
        }
      }
    }
    if (!WordStrV.Empty()){
      int DId=BowDocBs->AddDoc(DocNm, TStrV(), WordStrV);
      BowDocBs->PutDateStr(DId, DateStr);
   }
  }
  // return bag-of-words
  BowDocBs->AssertOk();
  printf("\nDone.\n");
  return BowDocBs;
}
PBowDocBs TCordisEuProjBs::GetBowDocBsFromEuProjDesc() const {
  printf("Generating Bag-Of-Words...\n");
  // create document vector
  TStrV HtmlStrV;
  int EuProjs=GetEuProjs();
  for (int EuProjN=0; EuProjN<EuProjs; EuProjN++){
    PCordisEuProj EuProj=GetEuProj(EuProjN);
    // get document & word ids
    TStr EuProjNm=EuProj->GetEuProjAcrStr();
    TStr EuProjHtmlStr=EuProj->GetTitleStr()+" "+EuProj->GetEuProjDescHtmlStr();
    HtmlStrV.Add(EuProjHtmlStr);
  }
  // create ngrams
  PSwSet SwSet=TSwSet::GetSwSet(swstEnglish523);
  PNGramBs NGramBs=TNGramBs::GetNGramBsFromHtmlStrV(HtmlStrV, 3, 3, SwSet);
  NGramBs->SaveTxt("NGram.Txt");
  // create bag-of-words
  printf("\n");
  PBowDocBs BowDocBs=TBowDocBs::New();
  BowDocBs->PutNGramBs(NGramBs);
  {for (int EuProjN=0; EuProjN<EuProjs; EuProjN++){
    if (EuProjN%100==0){printf("%d/%d\r", EuProjN, EuProjs);}
    PCordisEuProj EuProj=GetEuProj(EuProjN);
    TStr DocNm=EuProj->GetEuProjAcrStr();
    TStr HtmlStr=EuProj->GetTitleStr()+" "+EuProj->GetEuProjDescHtmlStr();
    BowDocBs->AddHtmlDoc(DocNm, TStrV(), HtmlStr);
  }}
  BowDocBs->AssertOk();
  // return bag-of-words
  printf("\nDone.\n");
  return BowDocBs;
}
Example #15
0
void LogOutput::PrintClusterInformation(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, PNGraph& QGraph, TIntV& ClusterIds, TSecTm PresentTime, TIntV &OldTopClusters) {
  if (!ShouldLog) return;
  TStr CurDateString = PresentTime.GetDtYmdStr();
  Err("Writing cluster information...\n");

  // PREVIOUS RANKING SETUP
  THash<TInt, TInt> OldRankings;
  if (OldTopClusters.Len() > 0) {
    for (int i = 0; i < OldTopClusters.Len(); i++) {
      OldRankings.AddDat(OldTopClusters[i], i + 1);
    }
  }

  TStrV RankStr;
  TStr ClusterJSONDirectory = Directory + "/web/json/clusters/";
  for (int i = 0; i < ClusterIds.Len(); i++) {
    TStr OldRankStr;
    ComputeOldRankString(OldRankings, ClusterIds[i], i+1, OldRankStr);
    RankStr.Add(OldRankStr);

    // JSON file for each cluster!
    TPrintJson::PrintClusterJSON(QB, DB, CB, QGraph, ClusterJSONDirectory, ClusterIds[i], PresentTime);
  }

  Err("JSON Files for individual written!\n");
  TStr JSONTableFileName = Directory + "/web/json/daily/" + CurDateString + ".json";
  TPrintJson::PrintClusterTableJSON(QB, DB, CB, JSONTableFileName, ClusterIds, RankStr);
  Err("JSON Files for the cluster table written!\n");
}
Example #16
0
void TStrUtil::GetStdNameV(TStr AuthorNames, TStrV& StdNameV) {
  AuthorNames.ChangeChAll('\n', ' ');
  AuthorNames.ToLc();
  // split into author names
  TStrV AuthV, TmpV, Tmp2V;
  // split on 'and'
  AuthorNames.SplitOnStr(" and ", TmpV);
  int i;
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(',', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on '&'
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh('&', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on ','
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(',', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on ';'
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(';', Tmp2V);  AuthV.AddV(Tmp2V); }
  // standardize names
  StdNameV.Clr();
  //printf("\n*** %s\n", AuthorNames.CStr());
  for (i = 0; i < AuthV.Len(); i++) {
    TStr StdName = GetStdName(AuthV[i]);
    if (! StdName.Empty()) {
      //printf("\t%s  ==>  %s\n", AuthV[i].CStr(), StdName.CStr());
      StdNameV.Add(StdName);
    }
  }
}
Example #17
0
void TEnv::GetVarNmValV(TStrV& VarNmValV) {
    VarNmValV.Clr();
    int VarN=0;
    while (_environ[VarN]!=NULL) {
        VarNmValV.Add(_environ[VarN++]);
    }
}
Example #18
0
void TUStr::GetWordStrV(TStrV& WordStrV){
  // clear word vector
  TUStrV WordUStrV;
  //WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
        WordStrV.Add(WordUStr.GetStr()); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}
Example #19
0
int main(int argc, char* argv[]){
  //test1();
  TTableContext Context;

  // create scheme
  Schema PostS;
  PostS.Add(TPair<TStr,TAttrType>("Id", atInt));
  PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr));
  PostS.Add(TPair<TStr,TAttrType>("Score", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4);

  PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("OwnerUserId");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);
  P->Aggregate(cols, aaSum, "Score", "Sum");
  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);
  
  if (atoi(argv[1]) == 0) return 0;

  P->SaveSS("tests/p3.txt");

  return 0;
}
Example #20
0
void TSAppSrvFun::GetFldValV(const TStrKdV& FldNmValPrV, const TStr& FldNm, TStrV& FldValV) {
	FldValV.Clr();
	int ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, ""));
	while (ValN != -1) {
		FldValV.Add(FldNmValPrV[ValN].Dat);
		ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, ""), ValN + 1);
	}
}
Example #21
0
void TJsonVal::GetArrStrV(TStrV& StrV) const {
    EAssert(IsArr());
    for (int StrN = 0; StrN < GetArrVals(); StrN++) {
        PJsonVal ArrVal = GetArrVal(StrN);
        EAssert(ArrVal->IsStr());
        StrV.Add(ArrVal->GetStr());
    }
}
Example #22
0
bool TFFile::Next(TStr& FNm){
  // if need to recurse
  if (!SubFFile.Empty()){
    if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;}
    else {SubFFile=NULL;}
  }
  // for all required file-paths
  while (FPathN<FPathV.Len()){
    // try to find anything within FPathV[FPathN] directory
    while (true) {
      // if directory not open -> open next first
      if (!FFileDesc->FDesc) {
        if ((++FPathN)<FPathV.Len()) {
          FFileDesc->FDesc = opendir(FPathV[FPathN].CStr());
        } else break;
        if (!FFileDesc->FDesc) break;   // failed to open this one; pass control to outer loop
      }

      FFileDesc->DirEnt = readdir(FFileDesc->FDesc);

      if (FFileDesc->DirEnt) {
        // found something
        TStr FBase = FFileDesc->GetFBase();
        FNm = FPathV[FPathN]+FBase;

        struct stat Stat;
        int ErrCd = stat(FNm.CStr(), &Stat);
        Assert(ErrCd==0); // !bn: assert-with-exception [pa se drugje po tej funkciji]

        if (S_ISREG(Stat.st_mode)) {
          if ((FBase!=".")&&(FBase!="..")){
            TStr FExt=FNm.GetFExt(); if (!CsImpP){FExt.ToUc(); FBase.ToUc();}
            if (((FExtV.Empty())||(FExtV.SearchForw(FExt)!=-1))&&
             ((FBaseWc.Empty())||(FBase.IsWcMatch(FBaseWc)))){
              CurFNm=FNm; CurFNmN++; return true;}
          }
        } else if (S_ISDIR(Stat.st_mode) && RecurseP) {
          if ((FBase!=".")&&(FBase!="..")){
            TStr SubFPath=FPathV[FPathN]+FBase;
            TStrV SubFPathV; SubFPathV.Add(SubFPath);
            SubFFile=New(SubFPathV, FExtV, FBaseWc, RecurseP);
            if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;}
            else {SubFFile=NULL;}
          }
        }
      } else {
        // end of directory; clean up (ignore DirEnt, it's allocated within FDesc), pass control to outer loop
        FFileDesc->DirEnt = NULL;
        int ErrCd = closedir(FFileDesc->FDesc);
        FFileDesc->FDesc = NULL;
        Assert(ErrCd==0);
        break;
      }
    }
  }
  // not found
  CurFNm=""; CurFNmN=-1; return false;
}
Example #23
0
void TNEANet::AttrNameEI(const TInt& EId, TStrIntPrH::TIter EdgeHI, TStrV& Names) const {
  Names = TVec<TStr>();
  while (!EdgeHI.IsEnd()) {
    if (!EdgeAttrIsDeleted(EId, EdgeHI)) {
      Names.Add(EdgeHI.GetKey());
    }
    EdgeHI++;
  }  
}
Example #24
0
// copy files for a particular folder info
void TBackupProfile::CopyFolder(const TStr& BaseTargetFolder, const TStr& SourceFolder, const TStrV& Extensions, const TStrV& SkipIfContainingV, const bool& IncludeSubfolders, const bool& ReportP, TStr& ErrMsg)
{
    try {
        // get the name of the source folder
        TStrV PathV; TDir::SplitPath(SourceFolder, PathV);
        EAssert(PathV.Len() > 0);

        // create the folder in the base target folder
        TStr TargetFolder = BaseTargetFolder + PathV[PathV.Len() - 1] + "/";
        if (!TDir::Exists(TargetFolder))
            TDir::GenDir(TargetFolder);

        // find files to be copied
        TStrV FileV;
        TFFile::GetFNmV(SourceFolder, Extensions, false, FileV);

        TStrV FolderV;
        // copy them
        for (int N = 0; N < FileV.Len(); N++) {
            // we found a file
            if (TFile::Exists(FileV[N])) {
                const TStr FileName = TDir::GetFileName(FileV[N]);
                // is this a file that we wish to ignore?
                bool ShouldCopy = true;
                for (int S = 0; S < SkipIfContainingV.Len(); S++) {
                    if (FileName.SearchStr(SkipIfContainingV[S]) >= 0)
                        ShouldCopy = false;
                }
                if (!ShouldCopy)
                    continue;
                const TStr TargetFNm = TargetFolder + FileName;
                if (ReportP)
                    TNotify::StdNotify->OnStatusFmt("Copying file: %s\r", FileName.CStr());
                TFile::Copy(FileV[N], TargetFNm);
            }
            // we found a folder
            else {
                FolderV.Add(FileV[N]);
            }
        }

        if (IncludeSubfolders) {
            for (int N = 0; N < FolderV.Len(); N++)
                CopyFolder(TargetFolder, FolderV[N], Extensions, SkipIfContainingV, IncludeSubfolders, ReportP, ErrMsg);
        }
    }
    catch (PExcept E) {
        if (ErrMsg != "")
            ErrMsg += "\n";
        ErrMsg += "Exception while copying from " + SourceFolder + ": " + E->GetMsgStr();
    }
    catch (...) {
        if (ErrMsg != "")
            ErrMsg += "\n";
        ErrMsg += "Exception while copying from " + SourceFolder + ": " + "Unrecognized exception occured.";
    }
}
Example #25
0
void TNEANet::AttrValueNI(const TInt& NId , TStrIntPrH::TIter NodeHI, TStrV& Values) const {
  Values = TVec<TStr>();
  while (!NodeHI.IsEnd()) {
    if (!NodeAttrIsDeleted(NId, NodeHI)) {
      Values.Add(GetNodeAttrValue(NId, NodeHI));
    }
    NodeHI++;
  }
}
Example #26
0
void TNEANet::FltAttrNameEI(const TInt& EId, TStrIntPrH::TIter EdgeHI, TStrV& Names) const {
  Names = TVec<TStr>();
  while (!EdgeHI.IsEnd()) {
    if (EdgeHI.GetDat().Val1 == FltType && !EdgeAttrIsFltDeleted(EId, EdgeHI)) {
      Names.Add(EdgeHI.GetKey());
    }
    EdgeHI++;
  }  
}
Example #27
0
void TNEANet::AttrNameNI(const TInt& NId, TStrIntPrH::TIter NodeHI, TStrV& Names) const {
  Names = TVec<TStr>();
  while (!NodeHI.IsEnd()) {
    if (!NodeAttrIsDeleted(NId, NodeHI)) {
      Names.Add(NodeHI.GetKey());
    }
    NodeHI++;
  }  
}
Example #28
0
void TNEANet::AttrValueEI(const TInt& EId, TStrIntPrH::TIter EdgeHI, TStrV& Values) const {
  Values = TVec<TStr>();
  while (!EdgeHI.IsEnd()) {
    if (!EdgeAttrIsDeleted(EId, EdgeHI)) {
      Values.Add(GetNodeAttrValue(EId, EdgeHI));
    }
    EdgeHI++;
  }  
}
Example #29
0
void TNEANet::FltAttrNameNI(const TInt& NId, TStrIntPrH::TIter NodeHI, TStrV& Names) const {
  Names = TVec<TStr>();
  while (!NodeHI.IsEnd()) {
    if (NodeHI.GetDat().Val1 == FltType && !NodeAttrIsFltDeleted(NId, NodeHI)) {
      Names.Add(NodeHI.GetKey());
    }
    NodeHI++;
  }  
}
Example #30
0
TStrV TEnv::GetIfArgPrefixStrV(
    const TStr& PrefixStr, TStrV& DfValV, const TStr& DNm) const {
    TStrV ArgValV;
    if (Env.GetArgs()<=MnArgs) {
        // 'usage' argument message
        if (!SilentP) {
            printf("   %s%s (default:", PrefixStr.CStr(), DNm.CStr());
            for (int DfValN=0; DfValN<DfValV.Len(); DfValN++) {
                if (DfValN>0) {
                    printf(", ");
                }
                printf("'%s'", DfValV[DfValN].CStr());
            }
            printf(")\n");
        }
        return ArgValV;
    } else {
        // argument & value message
        TStr ArgValVChA;
        for (int ArgN=0; ArgN<GetArgs(); ArgN++) {
            // get argument string
            TStr ArgStr=GetArg(ArgN);
            if (ArgStr.StartsWith(PrefixStr)) {
                // extract & add argument value
                ArgStr.DelStr(PrefixStr);
                ArgValV.Add(ArgStr);
                // add to message string
                if (ArgValV.Len()>1) {
                    ArgValVChA+=", ";
                }
                ArgValVChA+=ArgValV.Last();
            }
        }
        if (ArgValV.Empty()) {
            ArgValV=DfValV;
        }
        // output argument values
        TChA MsgChA;
        MsgChA+="  "+DNm;
        MsgChA+=" (";
        MsgChA+=PrefixStr;
        MsgChA+=")=";
        for (int ArgValN=0; ArgValN<ArgValV.Len(); ArgValN++) {
            if (ArgValN>0) {
                MsgChA+=", ";
            }
            MsgChA+="'";
            MsgChA+=ArgValV[ArgValN];
            MsgChA+="'";
        }
        if (!SilentP) {
            TNotify::OnStatus(Notify, MsgChA);
        }
        return ArgValV;
    }
}