PAlignPair TAlignPair::LoadAcXml(const TStr& FNm, const int& MxSents) {
    printf("Loading %s ...\n", FNm.CStr());
    // get the lanugagne names
    TStr BaseNm = FNm.GetFMid();
    TStrV PartV; BaseNm.SplitOnAllCh('-', PartV);
    IAssertR(PartV.Len() == 3, "Bad file name: " + BaseNm);
    // prepare aligne pair
    PAlignPair AlignPair = TAlignPair::New(PartV[1], PartV[2]);
    // parse the XML
    PTransCorpus TransCorpus = TTransCorpus::LoadAC(FNm, MxSents * 4);
    // select subset of sentences which will go into aligned corpus
    const int AllSents = TransCorpus->GetSentences();
    TIntV SentIdV(AllSents, 0);
    for (int SentId = 0; SentId < AllSents; SentId++) {
        SentIdV.Add(SentId); 
    }
    if (MxSents != -1 && AllSents > MxSents) {
    	TRnd Rnd(1);
        SentIdV.Shuffle(Rnd);
        SentIdV.Trunc(MxSents);
    }
    // add the sentences to the bow
    const int Sents = SentIdV.Len();
    for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        const TStr& Sent1 = TransCorpus->GetOrgStr(SentId);
        const TStr& Sent2 = TransCorpus->GetRefTransStrV(SentId)[0];
        AlignPair->AddSent(Sent1, Sent2);
    }
    // finish the alignment pair
    AlignPair->Def();
    return AlignPair;
}
Example #2
0
void DoLayout(const TStr& GraphInFNm, TStr OutFNm, const TGVizLayout& Layout) {
  TStr LayoutExe = GetLayoutStr(Layout), Ext = OutFNm.GetFExt(), GvPath;
  #if defined(GLib_WIN)
    GvPath = "C:\\Prog\\GraphViz\\bin\\";
  #else
    GvPath = "/usr/bin/";
    Ext = ".ps";
    OutFNm = OutFNm.GetFMid() + Ext;
  #endif
  IAssert(Ext==".ps" || Ext==".gif" || Ext==".png");
  const TStr ExeCmd = TStr::Fmt("%s -T%s %s -o %s", LayoutExe.CStr(),
    Ext.CStr()+1, GraphInFNm.CStr(), OutFNm.CStr());

  if (system(ExeCmd.CStr())==0) { return; }
  #if defined(GLib_WIN)
  if (system(TStr::Fmt(".\\%s", ExeCmd.CStr()).CStr())==0) { return; }
  #else
  if (system(TStr::Fmt("./%s", ExeCmd.CStr()).CStr())==0) { return; }
  #endif
  if (system(TStr::Fmt("%s%s", GvPath.CStr(), ExeCmd.CStr()).CStr())==0) { return; }
  fprintf(stderr, "[%s:%d] Cat not find GraphViz (%s). Set the PATH.\n", __FILE__, __LINE__, ExeCmd.CStr());
  //#if defined(GLib_WIN)
  //if (ShowPlot) system(TStr::Fmt("start %s", OutFNm.CStr()).CStr());
  //#endif
}
Example #3
0
		// parse from script filename, assuming default structure
		TJsParam(const TStr& RootFNm, const TStr& _FNm) {
			// remember script name
			FNm = _FNm;
			// derive namespace from filename
			Nm = FNm.GetFMid();
			// no initialization parameters
			InitVal = TJsonVal::NewObj();
			// handle default includes
			AddLocalLibFPath();
			AddQMinerLibFPath();
			// add sandbox access
			AddSandboxAccessFPath(RootFNm);
		}
Example #4
0
void TGnuPlot::SavePng(const TStr& FNm, const int& SizeX, const int& SizeY, const TStr& Comment, const TStr& Terminal) {
  if (Terminal.Empty()) {
    //#ifdef GLib_WIN
    #ifndef GLib_MACOSX  // The standard GNUPlot for MacOS does not support PNG
    AddCmd(TStr::Fmt("set terminal png small size %d,%d", SizeX, SizeY));
    AddCmd(TStr::Fmt("set output '%s'", FNm.CStr()));
    #else // EPS
    AddCmd("set terminal postscript eps 10 enhanced color");
    AddCmd(TStr::Fmt("set output '%s.eps'", FNm.GetFMid().CStr()));
    #endif
  } else {
    AddCmd(Terminal);
    AddCmd(TStr::Fmt("set output '%s'", FNm.CStr()));
  }
  Pause(false);
  CreatePlotFile(Comment.Empty()? Title : Comment);
  RunGnuPlot();
  MoreCmds.DelLast();
  MoreCmds.DelLast();
}
void TTb::SaveAssis(const TStr& FNm){
  TStr DoFNm=FNm.GetFPath()+"AsDo"+FNm.GetFMid().GetSubStr(0, 3)+".Dat";
  TStr DaFNm=FNm.GetFPath()+"AsDa"+FNm.GetFMid().GetSubStr(0, 3)+".Dat";

  TOLx DoLx(PSOut(new TFOut(DoFNm)), TFSet()|oloFrcEoln|oloSigNum|oloUniStr);
  int Dscs=GetVar(0)->GetVarType()->GetDscs(); Assert(Dscs>0);
  DoLx.PutInt(Dscs); DoLx.PutDosLn();
  for (int DscN=0; DscN<Dscs; DscN++){
    TTbVal Val=GetVar(0)->GetVarType()->GetVal(DscN);
    DoLx.PutStr(GetVar(0)->GetVarType()->GetValStr(Val)); DoLx.PutDosLn();
  }
  DoLx.PutInt(TInt(GetVars()-1)); DoLx.PutDosLn();
  for (int VarN=1; VarN<GetVars(); VarN++){
    DoLx.PutStr(GetVar(VarN)->GetNm()); DoLx.PutDosLn();
    int Dscs=GetVar(VarN)->GetVarType()->GetDscs();
    if (Dscs>0){
      DoLx.PutInt(Dscs); DoLx.PutDosLn();
      for (int DscN=0; DscN<Dscs; DscN++){
        TTbVal Val=GetVar(VarN)->GetVarType()->GetVal(DscN);
        DoLx.PutStr(GetVar(VarN)->GetVarType()->GetValStr(DscN)); DoLx.PutDosLn();}
    } else {
      DoLx.PutInt(TInt(0)); DoLx.PutInt(TInt(100)); DoLx.PutDosLn();
    }
  }

  TOLx DaLx(PSOut(new TFOut(DaFNm)), TFSet()|oloFrcEoln|oloSigNum|oloUniStr);
  for (int TupN=0; TupN<GetTups(); TupN++){
    for (int VarN=0; VarN<GetVars(); VarN++){
      TTbVal Val=GetVal(TupN, VarN);
      switch (Val.GetValTag()){
        case tvtUnknw: DaLx.PutSym(syQuestion); break;
        case tvtUnimp: DaLx.PutSym(syAsterisk); break;
        case tvtUnapp: DaLx.PutSym(syAsterisk); break;
        case tvtDsc: DaLx.PutInt(TInt(1+Val.GetDsc())); break;
        case tvtFlt: DaLx.PutFlt(Val.GetFlt()); break;
        default: Fail;
      }
    }
    DaLx.PutDosLn();
  }
}
Example #6
0
		// parse from json configuration file
		TJsParam(const TStr& RootFNm, const PJsonVal& JsVal) { 
			EAssertR(JsVal->IsObj(), "Unsupported type: " + TJsonVal::GetStrFromVal(JsVal));
			// we must have at least the script name
			EAssert(JsVal->IsObjKey("file"));
			// get script name				
			FNm = JsVal->GetObjStr("file");
			// get namespace (get from script name if not available)
			Nm = JsVal->IsObjKey("name") ? JsVal->GetObjStr("name") : FNm.GetFMid();
			// get initialization parameters (if available)
			InitVal = JsVal->IsObjKey("init") ? JsVal->GetObjKey("init") : TJsonVal::NewObj();
			// get library include folders
			if (JsVal->IsObjKey("include")) { 
				PJsonVal IncludesVal = JsVal->GetObjKey("include");
				EAssertR(IncludesVal->IsArr(), "Expected array of strings, not: " + TJsonVal::GetStrFromVal(IncludesVal));
				for (int IncludeN = 0; IncludeN < IncludesVal->GetArrVals(); IncludeN++) {
					PJsonVal IncludeVal = IncludesVal->GetArrVal(IncludeN);
					EAssertR(IncludeVal->IsStr(), "Expected string, not: " + TJsonVal::GetStrFromVal(IncludeVal));						
					IncludeFPathV.Add(IncludeVal->GetStr());
				}
			}
			// handle default includes
			AddLocalLibFPath();
			AddQMinerLibFPath();
			// get folders with access permissions
			if (JsVal->IsObjKey("dirs")) { 
				PJsonVal DirsVal = JsVal->GetObjKey("dirs");
				EAssertR(DirsVal->IsArr(), "Expected array of strings, not: " + TJsonVal::GetStrFromVal(DirsVal));
				for (int DirN = 0; DirN < DirsVal->GetArrVals(); DirN++) {
					PJsonVal DirVal = DirsVal->GetArrVal(DirN);
					EAssertR(DirVal->IsStr(), "Expected string, not: " + TJsonVal::GetStrFromVal(DirVal));
					AccessFPathV.Add(DirVal->GetStr());
				}
			}
			// add sandbox access
			AddSandboxAccessFPath(RootFNm);
		}
Example #7
0
TStr TZipOut::GetCmd(const TStr& ZipFNm) {
  if (FExtToCmdH.Empty()) FillFExtToCmdH();
  const TStr Ext = ZipFNm.GetFExt().GetLc();
  EAssertR(FExtToCmdH.IsKey(Ext), TStr::Fmt("Unsupported file extension '%s'", Ext.CStr()));
  return FExtToCmdH.GetDat(Ext)+ZipFNm.GetFMid();
}
Example #8
0
int main(int argc, char* argv[]) {
	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(
			TStr::Fmt("Kronecker graphs. build: %s, %s. Time: %s", __TIME__,
					__DATE__, TExeTm::GetCurTm()));
	TExeTm ExeTm;
	Try
		Env = TEnv(argc, argv, TNotify::StdNotify);
		const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt",
				"Input graph file (single directed edge per line)");
		TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix");
		const TInt NZero = Env.GetIfArgPrefixInt("-n0:", 2,
				"Innitiator matrix size");
		const TStr InitMtx = Env.GetIfArgPrefixStr("-m:", "0.9 0.7; 0.5 0.2",
				"Init Gradient Descent Matrix (R=random)").GetLc();
		const TStr Perm =
				Env.GetIfArgPrefixStr("-p:", "d",
						"Initial node permutation: d:Degree, r:Random, o:Order").GetLc();
		const TInt GradIter = Env.GetIfArgPrefixInt("-gi:", 50,
				"Gradient descent iterations");
		const TFlt LrnRate = Env.GetIfArgPrefixFlt("-l:", 1e-5,
				"Learning rate");
		const TFlt MnStep = Env.GetIfArgPrefixFlt("-mns:", 0.005,
				"Minimum gradient step");
		const TFlt MxStep = Env.GetIfArgPrefixFlt("-mxs:", 0.05,
				"Maximum gradient step");
		const TInt WarmUp = Env.GetIfArgPrefixInt("-w:", 10000,
				"Samples to warm up");
		const TInt NSamples = Env.GetIfArgPrefixInt("-s:", 100000,
				"Samples per gradient estimation");
		//const TInt GradType = Env.GetIfArgPrefixInt("-gt:", 1, "1:Grad1, 2:Grad2");
		const bool ScaleInitMtx = Env.GetIfArgPrefixBool("-sim:", true,
				"Scale the initiator to match the number of edges");
		const TFlt PermSwapNodeProb =
				Env.GetIfArgPrefixFlt("-nsp:", 1.0,
						"Probability of using NodeSwap (vs. EdgeSwap) MCMC proposal distribution");
		if (OutFNm.Empty()) {
			OutFNm = TStr::Fmt("%s-fit%d", InFNm.GetFMid().CStr(), NZero());
		}
		// load graph
		PNGraph G;
		if (InFNm.GetFExt().GetLc() == ".ungraph") {
			TFIn FIn(InFNm);
			G = TSnap::ConvertGraph<PNGraph>(TUNGraph::Load(FIn), true);
		} else if (InFNm.GetFExt().GetLc() == ".ngraph") {
			TFIn FIn(InFNm);
			G = TNGraph::Load(FIn);
		} else {
			G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1);
		}
		// fit
		TKronMtx InitKronMtx =
				InitMtx == "r" ?
						TKronMtx::GetRndMtx(NZero, 0.1) :
						TKronMtx::GetMtx(InitMtx);
		InitKronMtx.Dump("INIT PARAM", true);
		TKroneckerLL KronLL(G, InitKronMtx, PermSwapNodeProb);
		if (ScaleInitMtx) {
			InitKronMtx.SetForEdges(G->GetNodes(), G->GetEdges());
		}
		KronLL.InitLL(G, InitKronMtx);
		InitKronMtx.Dump("SCALED PARAM", true);
		KronLL.SetPerm(Perm.GetCh(0));
		double LogLike = 0;
		//if (GradType == 1) {
		LogLike = KronLL.GradDescent(GradIter, LrnRate, MnStep, MxStep, WarmUp,
				NSamples);
		//} else if (GradType == 2) {
		//  LogLike = KronLL.GradDescent2(GradIter, LrnRate, MnStep, MxStep, WarmUp, NSamples); }
		//else{ Fail; }
		const TKronMtx& FitMtx = KronLL.GetProbMtx();
		FILE *F = fopen(OutFNm.CStr(), "w");
		fprintf(F, "Input\t%s\n", InFNm.CStr());
		TStrV ParamV;
		Env.GetCmLn().SplitOnAllCh(' ', ParamV);
		fprintf(F, "Command line options\n");
		for (int i = 0; i < ParamV.Len(); i++) {
			fprintf(F, "\t%s\n",
					ParamV[i].CStr() + (ParamV[i][0] == '-' ? 1 : 0));
		}
		fprintf(F, "Loglikelihood\t%10.2f\n", LogLike);
		fprintf(F, "Absolute error (based on expected number of edges)\t%f\n",
				KronLL.GetAbsErr());
		fprintf(F, "RunTime\t%g\n", ExeTm.GetSecs());
		fprintf(F, "Estimated initiator\t%s\n", FitMtx.GetMtxStr().CStr());
		fclose(F);

	Catch
	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(),
			TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Example #9
0
// width=0.3, height=0.3, label=\"\", style=filled, color=black
void TGraphKey::PlotGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const {
  const TStr DotFNm = OutFNm.GetFMid()+".dot";
  SaveGViz(DotFNm, Desc, NodeAttrs, Size);
  TGraphViz::DoLayout(DotFNm, OutFNm, gvlDot);
}
Example #10
0
void BigMain(int argc, char* argv[]) {
  TExeTm ExeTm;
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs("QuotesApp");
  const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
  if (Env.IsEndOfRun()) {
    printf("To do:\n");
    printf("    MkDataset         : Make memes dataset (extract quotes and save txt)\n");
    printf("    ExtractSubset     : Extract a subset of memes containing particular words\n");
    printf("    MemesToQtBs       : Load memes dataset and create quote base\n");
    printf("    MkClustNet        : Build cluster network from the quote base\n");
    return;
  }	
#pragma region mkdataset
  // extract quotes and links and make them into a single file
  if (ToDo == "mkdataset") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
    const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
    const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
    const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
    //// parse directly from Spinn3r
    TStr Spinn3rFNm;
    THashSet<TMd5Sig> SeenUrlSet;
    if (UrlOnlyOnce && ! UrlFNm.Empty()) {  // keep track of already seen urls (so that there are no duplicate urls)
      TFIn FIn(UrlFNm);  SeenUrlSet.Load(FIn);
    }
    FILE *F = fopen(OutFNm.CStr(), "wt");
    TFIn FIn(InFNm);
    int Items=0;
    for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
      TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
      printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
      fflush(stdout);
      for (int item = 0; QE.Next(); item++) {
        const TMd5Sig PostMd5(QE.PostUrlStr);
        if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
        if (UrlOnlyOnce) {
          if (SeenUrlSet.IsKey(PostMd5)) { continue; }
          SeenUrlSet.AddKey(PostMd5);
        }
        fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
        //if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
        fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
        for (int q = 0; q < QE.QuoteV.Len(); q++) {
          if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
            fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
        }
        for (int l = 0; l < QE.LinkV.Len(); l++) {
          fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
        fprintf(F, "\n");
        if (item>0 && item % Kilo(100) == 0) {
          QE.DumpStat();  QE.ExeTm.Tick(); }
        Items++;
      }
      printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
      fflush(stdout);
    }
    printf("all done. Saving %d post urls\n", SeenUrlSet.Len());  fflush(stdout);
    if (! SeenUrlSet.Empty()) {
      TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
      SeenUrlSet.Save(FOut);
    }
    fclose(F);
  }
#pragma endregion mkdataset

#pragma region extractsubset
  // save posts with memes containing particular words
  else if (ToDo == "extractsubset") {
    const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
    const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
    const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");

    TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
    printf("Loading %s\n", WordsFNm.CStr());
    { TFIn FIn(WordsFNm);
    for (TStr Ln; FIn.GetNextLn(Ln); ) {
      printf("  %s\n", Ln.GetLc().CStr());
      CatchMemeV.Add(Ln.GetLc()); }
    }
    printf("%d strings loaded\n", CatchMemeV.Len());
    TFOut FOut(OutFNm);
    TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
    for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
      bool DoSave = false;
      for (int m = 0; m < Memes.MemeV.Len(); m++) {
        for (int i = 0; i < CatchMemeV.Len(); i++) {
          if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
            DoSave=true; break; }
        }
        if (DoSave) { break; }
      }
      if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
      if (posts % Mega(1) == 0) {
        printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
        FOut.Flush();
      }
    }
  }
#pragma endregion extractsubset

#pragma region memestoqtbs
  // load memes dataset (MkDataset) and create quote base
  else if (ToDo == "memestoqtbs") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201007_201107.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    const TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-l:", 4, "Min quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");
		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20100714", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20110728", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		PQuoteBs QtBs = TQuoteBs::New();
		int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
		int UrlSetSize = 4 * HashTableSize;
		QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
		}
#pragma endregion memestoqtbs

#pragma region mkclustnet
  // make cluster network
  else if (ToDo == "mkclustnet") {
    TStr InQtBsNm = Env.GetIfArgPrefixStr("-i:", "", "Input quote base file name");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output network/updated QtBs filename");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-mf:", 5, "Min meme frequency");
		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");

		// Load quote base
    PQuoteBs QtBs;
    if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
    else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
  }
#pragma endregion mkclustnet

#pragma region memeclust
	else if (ToDo.SearchStr(TStr("memeclust")) >= 0) {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201101.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
		const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");

		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsQtBsReady = Env.GetIfArgPrefixBool("-qtbsready:", false, "Indicate whether quote base is ready and can be loaded readily");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");

		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20010101", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20200101", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		// Construct the quote base from Zarya data
		PQuoteBs QtBs = TQuoteBs::New();

		if (!IsQtBsReady) {
			int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
			if (ToDo == "memeclustzarya") {
				int UrlSetSize = 4 * HashTableSize;
				QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
			}	else if (ToDo == "memeclustqtonly") {
				QtBs->ConstructQtBsQtOnly(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else if (ToDo == "memeclustqttime") {
				QtBs->ConstructQtBsQtTime(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else {
				printf("Please specify one of the three options for -do : memeclustzarya, memeclustqtonly, memeclustqttime!\n");
				return;
			}
		} else {
			TStr InQtBsNm = TStr::Fmt("%s-w%dmfq%d.QtBs", Pref.CStr(), MinWrdLen, MinMemeFq);
			if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
			else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }
		}

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
	}
#pragma endregion memeclust
}