Пример #1
0
void MakeSignEpinions() {
  TSsParser Ss("/u/ana/data/EpinionRatings/user_rating.txt", ssfTabSep);
  //PSignNet Net = TSignNet::New();
  TPt<TNodeEDatNet<TInt, TInt> >  Net = TNodeEDatNet<TInt, TInt>::New();
  TStrHash<TInt> StrSet(Mega(1), true);
      
  while (Ss.Next()) {
    if ( ((TStr)Ss[0]).IsPrefix("#")  )
      continue;
    const int SrcNId = StrSet.AddKey(Ss[0]);
    const int DstNId = StrSet.AddKey(Ss[1]);
    if (! Net->IsNode(SrcNId)) { Net->AddNode(SrcNId); }
    if (! Net->IsNode(DstNId)) { Net->AddNode(DstNId); }
    const int Sign = ((TStr)Ss[2]).GetInt();
    Net->AddEdge(SrcNId, DstNId, Sign);
  }
    
  //    PrintGraphStatTable(Graph, OutFNm, Desc);
  TStr OutFNm = "soc-sign-epinions-user-ratings";
  TStr Desc = "Epinions signed social network";

  // copied from gio.h - line 111
  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F, "# Directed graph: %s\n", OutFNm.CStr());
  if (! Desc.Empty()) 
    fprintf(F, "# %s\n", (Desc).CStr());
  fprintf(F, "# Nodes: %d Edges: %d\n", Net->GetNodes(), Net->GetEdges());
  fprintf(F, "# FromNodeId\tToNodeId\tSign\n"); 
  for (TNodeEDatNet<TInt,TInt>::TEdgeI ei = Net->BegEI(); ei < Net->EndEI(); ei++) {
      fprintf(F, "%d\t%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId(), ei()());
  }
  fclose(F);
  
  PrintGraphStatTable(Net, OutFNm, Desc);
}
Пример #2
0
// Slashdot network
void MakeSlashdotNet(TStr InFNm, TStr OutFNm, TStr Desc) {
  TSsParser Ss(InFNm, ssfTabSep);
  PNGraph Graph = TNGraph::New();
  TStrHash<TInt> StrSet(Mega(1), true);
  while (Ss.Next()) {
    const int SrcNId = StrSet.AddKey(Ss[0]);
    if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
    for (int dst = 2; dst < Ss.Len(); dst++) {
      const int DstNId = StrSet.AddKey(Ss[dst]);
      if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
      Graph->AddEdge(SrcNId, DstNId);
    }
  }
  PrintGraphStatTable(Graph, OutFNm, Desc);
}
Пример #3
0
void MakeLJNets(TStr InFNm, TStr OutFNm, TStr Desc){
  TStrHash<TInt> StrSet(Mega(1), true);
  for (int i = 1; i < 13; i++){
    TStr tmp = "";  
      
    if (i < 10)
      tmp = InFNm + "ljgraph.0" + TInt::GetStr(i);  
    else
      tmp = InFNm + "ljgraph." + TInt::GetStr(i);  
  
    printf("%s\n",tmp());

    TSsParser Ss(tmp, ssfTabSep);
    PNGraph Graph = TNGraph::New();
      
    while (Ss.Next()) {
      const int SrcNId = StrSet.AddKey(Ss[0]);
      if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
      for (int dst = 2; dst < Ss.Len(); dst++) {
        TStr ls,rs;
          ((TStr)Ss[dst]).SplitOnCh(ls,' ',rs);
        if (ls == ">"){
          const int DstNId = StrSet.AddKey(rs);
          if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
          Graph->AddEdge(SrcNId, DstNId);
        }
      }
    }
    if (i < 10)
      OutFNm = "soc-lj-friends.0"+TInt::GetStr(i);
    else
      OutFNm = "soc-lj-friends."+TInt::GetStr(i);
  
    PrintGraphStatTable(Graph, OutFNm, Desc);
  }
}
Пример #4
0
void MakeLJGroupsNets(const TStr InFNm, TStr OutFNm, TStr Desc){
  TStrHash<TInt> StrSetU(Mega(1), true);
  TStrHash<TInt> StrSetG(Mega(1), true);
  
  for (int i = 1; i < 13; i++){
    TStr tmp = "";  
      
    if (i < 10)
      tmp = InFNm + "ljgraph.0" + TInt::GetStr(i);  
    else
      tmp = InFNm + "ljgraph." + TInt::GetStr(i);  
  
    printf("%s\n",tmp());

    TSsParser Ss(tmp, ssfTabSep);
    PNGraph Graph = TNGraph::New();
      
    while (Ss.Next()) {
      const int SrcNId = StrSetU.AddKey(Ss[0]);
      if (! Graph->IsNode(SrcNId)) 
         Graph->AddNode(SrcNId); 
      for (int dst = 2; dst < Ss.Len(); dst++) {
        TStr ls,rs;
          ((TStr)Ss[dst]).SplitOnCh(ls,' ',rs);
        if (ls == ">"){
          const int DstNId = StrSetU.AddKey(rs);
          if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
          Graph->AddEdge(SrcNId, DstNId);
        }
      }
    }
    if (i < 10)
      OutFNm = "soc-lj-friends.0"+TInt::GetStr(i);
    else
      OutFNm = "soc-lj-friends."+TInt::GetStr(i);
  
  //  PrintGraphStatTable(Graph, OutFNm, Desc);
  }
  
  for (int i = 5; i < 14; i++){
    TStr tmp = "";       
    
    tmp = InFNm + "ljcomm." + TInt::GetStr(i);  
    printf("%s\n",tmp());
    
    TSsParser Ss1(tmp, ssfTabSep);
  
    PNGraph Graph1 = TNGraph::New();
    PNGraph Graph2 = TNGraph::New();
  
    TSsParser Ss(tmp, ssfTabSep);    
    while (Ss.Next()) {
      const int SrcNId = StrSetU.AddKey(Ss[0]);
      if (! Graph1->IsNode(SrcNId)) { Graph1->AddNode(SrcNId); }
      if (! Graph2->IsNode(SrcNId)) { Graph2->AddNode(SrcNId); }
      
      for (int dst = 2; dst < Ss.Len(); dst++) {   
        TStr ls,rs;
        ((TStr)Ss[dst]).SplitOnCh(ls,' ',rs);            
        const int DstNId = StrSetG.AddKey(rs) + 10000000;
        if (! Graph1->IsNode(DstNId)) { Graph1->AddNode(DstNId); }
        if (! Graph2->IsNode(DstNId)) { Graph2->AddNode(DstNId); }
        if (ls == "<") // member
          Graph1->AddEdge(SrcNId, DstNId);  
        else if (ls == ">") // watching
          Graph2->AddEdge(SrcNId, DstNId);      
      }
    }
    OutFNm = "soc-lj-comm-";
    TStr s = "";
    if (i < 10)
      s = "0";
    
    PrintGraphStatTable(Graph1, OutFNm+"members-" + s + TInt::GetStr(i), Desc+" communities - members");
    PrintGraphStatTable(Graph2, OutFNm+"watchers-"+ s + TInt::GetStr(i), Desc+" communities - watchers");
  }
}
Пример #5
0
// Parse files downloaded from IMDB. Actors point to movies.
// Files: actors.list.gz, languages.list.gz, countries.list.gz
PImdbNet TImdbNet::LoadFromImdb(const TStr& DataDir) {
  PImdbNet Net = TImdbNet::New();
  Net->Reserve((int) Mega(2.5), -1);
  // ACTORS
  { TSsParser Ss(DataDir+"\\actors.list.gz", ssfTabSep);
  while (Ss.Next() && strcmp(Ss[0],"THE ACTORS LIST")!=0) { }
  Ss.Next();  Ss.Next();  Ss.Next();  
  int ActorId = -1, NAct=0;
  for (int i = 0; Ss.Next(); i++) {
    //printf("%s\n", Ss.DumpStr());
    int mPos = 0;
    if (Ss.Len() > 1) { // actor
      ActorId = Net->AddStr(Ss[0]);
      if (Net->IsNode(ActorId)) {
        printf("  actor '%s'(%d) is already a node %s\n", Ss[0], 
          ActorId, TImdbNet::GetMovieTyStr((TMovieTy) Net->GetNDat(ActorId).Type.Val).CStr());
        continue;
      } else { Net->AddNode(ActorId); }
      TImdbNode& Node = Net->GetNDat(ActorId);
      Node.Name = ActorId;  NAct++;
      Node.Type = mtyActor;
      mPos = 1;  while (strlen(Ss[mPos])==0) { mPos++; }
    } 
    // movie (delete everything last [)
    //  also parse the position <>, but is a property of an edge (actor, movie) pair
    char *DelFrom;
    char *C1 = strrchr(Ss[mPos], '<');
    char *C2 = strrchr(Ss[mPos], '[');
    if (C1==NULL) { DelFrom=C2; }
    else if (C2==NULL) { DelFrom=C1; }
    else { DelFrom = TMath::Mn(C1, C2); }
    if (DelFrom != NULL) {
      char *mov = DelFrom;  while (*mov) { *mov=0; mov++; }
      mov = (char *) DelFrom-1;  while (TCh::IsWs(*mov)) { *mov=0; mov--; }
    }
    const int MovNId = Net->AddStr(Ss[mPos]);
    if (! Net->IsNode(MovNId)) {
      Net->AddNode(MovNId);
      TImdbNode& Node = Net->GetNDat(MovNId);
      Node.Type = mtyMovie;  Node.Year = GetYearFromTitle(Ss[mPos]);
    }
    if (Net->IsEdge(ActorId, MovNId)) {
      printf("  already an edge %d %d\n", ActorId, MovNId); }
    else { Net->AddEdge(ActorId, MovNId); }
    if ((i+1) % Kilo(10) == 0) { 
      printf("\r   %d", (i+1)/1000); 
      if ((i+1) % Kilo(100) == 0) { 
        printf(" nodes: %d, edges: %d, actors: %d", 
          Net->GetNodes(), Net->GetEdges(), NAct); }
    }
  } 
  printf("\n=== nodes: %d, edges: %d, actors: %d", Net->GetNodes(), Net->GetEdges(), NAct); }
  // MOVIE LANGUAGE */
  { TSsParser Ss(DataDir+"\\language.list.gz", ssfTabSep);
  while (Ss.Next() && strcmp(Ss[0],"LANGUAGE LIST")!=0) { }
  Ss.Next();
  int LangCnt=0, i;
  for (i = 0; Ss.Next(); i++) {
    char *Mov = Ss[0];
    char *Lang = Ss[Ss.Len()-1];
    if (Net->IsStr(Mov)) {
      const int NId = Net->GetStrId(Mov);
      Net->GetNDat(NId).Lang = Net->AddStr(Lang);
      LangCnt++;
    } //else { printf("movie not found: '%s'\n", Mov); }
    if ((i+1) % Kilo(10) == 0) { 
      printf("\r   %d found %d ", (i+1), LangCnt); }
  } 
  printf("\n  LANG: total movies: %d,  found %d\n", (i+1), LangCnt); }
  // MOVIE COUNTRY
  { TSsParser Ss(DataDir+"\\countries.list.gz", ssfTabSep);
  while (Ss.Next() && strcmp(Ss[0],"COUNTRIES LIST")!=0) { }
  Ss.Next();
  int LangCnt=0, i;
  for (i = 0; Ss.Next(); i++) {
    char *Mov = Ss[0];
    char *Cntry = Ss[Ss.Len()-1];
    if (Net->IsStr(Mov)) {
      const int NId = Net->GetStrId(Mov);
      Net->GetNDat(NId).Cntry = Net->AddStr(Cntry);
      LangCnt++;
    } //else { printf("country not found: '%s'\n", Mov); }
    if ((i+1) % Kilo(10) == 0) { 
      printf("\n   %d found %d ", (i+1), LangCnt); }
  } 
  printf("\r  CNTRY: total movies: %d,  found %d\n", (i+1), LangCnt);  }
  return Net;
}
Пример #6
0
void BigMain(int argc, char* argv[]) {
  TExeTm ExeTm;
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs("QuotesApp");
  const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
  if (Env.IsEndOfRun()) {
    printf("To do:\n");
    printf("    MkDataset         : Make memes dataset (extract quotes and save txt)\n");
    printf("    ExtractSubset     : Extract a subset of memes containing particular words\n");
    printf("    MemesToQtBs       : Load memes dataset and create quote base\n");
    printf("    MkClustNet        : Build cluster network from the quote base\n");
    return;
  }	
#pragma region mkdataset
  // extract quotes and links and make them into a single file
  if (ToDo == "mkdataset") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
    const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
    const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
    const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
    //// parse directly from Spinn3r
    TStr Spinn3rFNm;
    THashSet<TMd5Sig> SeenUrlSet;
    if (UrlOnlyOnce && ! UrlFNm.Empty()) {  // keep track of already seen urls (so that there are no duplicate urls)
      TFIn FIn(UrlFNm);  SeenUrlSet.Load(FIn);
    }
    FILE *F = fopen(OutFNm.CStr(), "wt");
    TFIn FIn(InFNm);
    int Items=0;
    for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
      TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
      printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
      fflush(stdout);
      for (int item = 0; QE.Next(); item++) {
        const TMd5Sig PostMd5(QE.PostUrlStr);
        if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
        if (UrlOnlyOnce) {
          if (SeenUrlSet.IsKey(PostMd5)) { continue; }
          SeenUrlSet.AddKey(PostMd5);
        }
        fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
        //if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
        fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
        for (int q = 0; q < QE.QuoteV.Len(); q++) {
          if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
            fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
        }
        for (int l = 0; l < QE.LinkV.Len(); l++) {
          fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
        fprintf(F, "\n");
        if (item>0 && item % Kilo(100) == 0) {
          QE.DumpStat();  QE.ExeTm.Tick(); }
        Items++;
      }
      printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
      fflush(stdout);
    }
    printf("all done. Saving %d post urls\n", SeenUrlSet.Len());  fflush(stdout);
    if (! SeenUrlSet.Empty()) {
      TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
      SeenUrlSet.Save(FOut);
    }
    fclose(F);
  }
#pragma endregion mkdataset

#pragma region extractsubset
  // save posts with memes containing particular words
  else if (ToDo == "extractsubset") {
    const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
    const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
    const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");

    TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
    printf("Loading %s\n", WordsFNm.CStr());
    { TFIn FIn(WordsFNm);
    for (TStr Ln; FIn.GetNextLn(Ln); ) {
      printf("  %s\n", Ln.GetLc().CStr());
      CatchMemeV.Add(Ln.GetLc()); }
    }
    printf("%d strings loaded\n", CatchMemeV.Len());
    TFOut FOut(OutFNm);
    TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
    for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
      bool DoSave = false;
      for (int m = 0; m < Memes.MemeV.Len(); m++) {
        for (int i = 0; i < CatchMemeV.Len(); i++) {
          if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
            DoSave=true; break; }
        }
        if (DoSave) { break; }
      }
      if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
      if (posts % Mega(1) == 0) {
        printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
        FOut.Flush();
      }
    }
  }
#pragma endregion extractsubset

#pragma region memestoqtbs
  // load memes dataset (MkDataset) and create quote base
  else if (ToDo == "memestoqtbs") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201007_201107.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    const TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-l:", 4, "Min quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");
		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20100714", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20110728", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		PQuoteBs QtBs = TQuoteBs::New();
		int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
		int UrlSetSize = 4 * HashTableSize;
		QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
		}
#pragma endregion memestoqtbs

#pragma region mkclustnet
  // make cluster network
  else if (ToDo == "mkclustnet") {
    TStr InQtBsNm = Env.GetIfArgPrefixStr("-i:", "", "Input quote base file name");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output network/updated QtBs filename");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-mf:", 5, "Min meme frequency");
		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");

		// Load quote base
    PQuoteBs QtBs;
    if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
    else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
  }
#pragma endregion mkclustnet

#pragma region memeclust
	else if (ToDo.SearchStr(TStr("memeclust")) >= 0) {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201101.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
		const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");

		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsQtBsReady = Env.GetIfArgPrefixBool("-qtbsready:", false, "Indicate whether quote base is ready and can be loaded readily");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");

		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20010101", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20200101", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		// Construct the quote base from Zarya data
		PQuoteBs QtBs = TQuoteBs::New();

		if (!IsQtBsReady) {
			int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
			if (ToDo == "memeclustzarya") {
				int UrlSetSize = 4 * HashTableSize;
				QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
			}	else if (ToDo == "memeclustqtonly") {
				QtBs->ConstructQtBsQtOnly(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else if (ToDo == "memeclustqttime") {
				QtBs->ConstructQtBsQtTime(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else {
				printf("Please specify one of the three options for -do : memeclustzarya, memeclustqtonly, memeclustqttime!\n");
				return;
			}
		} else {
			TStr InQtBsNm = TStr::Fmt("%s-w%dmfq%d.QtBs", Pref.CStr(), MinWrdLen, MinMemeFq);
			if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
			else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }
		}

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
	}
#pragma endregion memeclust
}