Exemple #1
0
/////////////////////////////////////////////////
// Output-Stream
TSOut::TSOut(const TStr& Str):
  TSBase(Str.CStr()), MxLnLen(-1), LnLen(0){}
Exemple #2
0
// file extensions
bool THttp::IsHtmlFExt(const TStr& FExt){
  TStr UcFExt=FExt.GetUc();
  return ((UcFExt==TFile::HtmlFExt.GetUc())||(UcFExt==TFile::HtmFExt.GetUc()));
}
Exemple #3
0
/////////////////////////////////////////////////
// Tql-Lexical-Chars
void TTqlChDef::SetChTy(const TTqlLxChTy& ChTy, const TStr& Str){
  for (int ChN=0; ChN<Str.Len(); ChN++){ChTyV[Str[ChN]-TCh::Mn]=TInt(ChTy);}
}
Exemple #4
0
TStr TEnv::GetArgPostfix(const TStr& PrefixStr) const {
    int ArgN = GetPrefixArgN(PrefixStr);
    IAssert(ArgN != -1);
    TStr ArgStr = GetArg(ArgN);
    return ArgStr.GetSubStr(PrefixStr.Len(), ArgStr.Len());
}
Exemple #5
0
void THttpChDef::SetLcCh(const TStr& Str){
  for (int ChN=1; ChN<Str.Len(); ChN++){LcChV[Str[ChN]-TCh::Mn]=TCh(Str[0]);}}
Exemple #6
0
bool TFFile::Next(TStr& FNm){
  // if need to recurse
  if (!SubFFile.Empty()){
    if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;}
    else {SubFFile=NULL;}
  }
  // for all required file-paths
  while (FPathN<FPathV.Len()){
    if ((FPathN!=-1)&&(FindNextFile(FFileDesc->FFileH, &FFileDesc->FDesc))){
      // next file-name available on the current file-path
      TStr FBase=FFileDesc->GetFBase();
      if ((RecurseP)&&(FFileDesc->IsDir())){
        // file-name is directory and recursion is required
        if ((FBase!=".")&&(FBase!="..")){
          // directory is non-trivial - prepare sub-file-find for recursion
          TStr SubFPath=FPathV[FPathN]+FBase;
          TStrV SubFPathV; SubFPathV.Add(SubFPath);
          SubFFile=New(SubFPathV, FExtV, FBaseWc, RecurseP);
          if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;}
          else {SubFFile=NULL;}
        }
      } else {
        // return file-name if fits
        if ((FBase!=".")&&(FBase!="..")){
          FNm=FPathV[FPathN]+FBase;
          TStr FExt=FNm.GetFExt(); if (!CsImpP){FExt.ToUc(); FBase.ToUc();}
          if (((FExtV.Empty())||(FExtV.SearchForw(FExt)!=-1))&&
           ((FBaseWc.Empty())||(FBase.IsWcMatch(FBaseWc)))){
            CurFNm=FNm; CurFNmN++; return true;}
        }
      }
    } else {
      // close file-find descriptor if needed
      if (FPathN!=-1){
        IAssert(FindClose(FFileDesc->FFileH));
        FFileDesc->FFileH=INVALID_HANDLE_VALUE;
      }
      // find next file existing path from the input list
      while ((++FPathN<FPathV.Len())&&
       ((FFileDesc->FFileH=FindFirstFile((FPathV[FPathN]+"*.*").CStr(),
       &FFileDesc->FDesc))==INVALID_HANDLE_VALUE)){}
      if ((FPathN<FPathV.Len())&&(RecurseP)&&(FFileDesc->IsDir())){
        // file-path found, file-name is directory and recursion is required
        TStr FBase=FFileDesc->GetFBase();
        if ((FBase!=".")&&(FBase!="..")){
          TStr SubFPath=FPathV[FPathN]+FBase;
          TStrV SubFPathV; SubFPathV.Add(SubFPath);
          SubFFile=New(SubFPathV, FExtV, FBaseWc, RecurseP);
          if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;}
          else {SubFFile=NULL;}
        }
      } else {
        // return file-name if fits
        if (FPathN<FPathV.Len()){
          TStr FBase=FFileDesc->GetFBase();
          if ((FBase!=".")&&(FBase!="..")){
            FNm=FPathV[FPathN]+FBase;
            TStr FExt=FNm.GetFExt(); if (!CsImpP){FExt.ToUc(); FBase.ToUc();}
            if (((FExtV.Empty())||(FExtV.SearchForw(FExt)!=-1))&&
             ((FBaseWc.Empty())||(FBase.IsWcMatch(FBaseWc)))){
              CurFNm=FNm; CurFNmN++; return true;
            }
          }
        }
      }
    }
  }
  // not found
  CurFNm=""; CurFNmN=-1; return false;
}
Exemple #7
0
void TEnv::PutVarVal(const TStr& VarNm, const TStr& VarVal) {
    const int RetVal =
        putenv(TStr::Fmt("%s=%s", VarNm.CStr(), VarVal.CStr())
                   .CStr());
    IAssert(RetVal == 0);
}
Exemple #8
0
void TGgWebFetchSaver::OnError(const int&, const TStr& MsgStr){
  printf("Error [Wait:%d Conn.:%d]: %s\n",
   GetWaitUrls(), GetConnUrls(), MsgStr.CStr());
  if (Empty()){
    TSysMsg::Quit();}
}
Exemple #9
0
/////////////////////////////////////////////////
// Google-Focused-Crawl
PGgFCrawl TGgFCrawl::GetFCrawl(
 const TStr& SrcUrlStr, const int& MxCands, const TStr& ProxyStr){
  // collect related urls
  printf("Expand source URL: %s\n", SrcUrlStr.CStr());
  PRSet SrcUrlRSet=
   TGg::WebSearch(TStr("related:")+SrcUrlStr, -1, TNotify::NullNotify, ProxyStr);
  // create & prepare focused-crawl
  PGgFCrawl FCrawl=TGgFCrawl::New();
  FCrawl->SrcUrlStr=SrcUrlStr;
  FCrawl->DstRSet=TRSet::New(SrcUrlRSet);
  // fill hits
  for (int HitN=0; HitN<SrcUrlRSet->GetHits(); HitN++){
    if ((MxCands!=-1)&&(FCrawl->DstRSet->GetHits()>MxCands)){break;}
    TStr HitUrlStr=SrcUrlRSet->GetHitUrlStr(HitN);
    printf("Expand URL: %s\n", HitUrlStr.CStr());
    PRSet RelUrlRSet=
     TGg::WebSearch(TStr("related:")+HitUrlStr, -1, TNotify::NullNotify, ProxyStr);
    FCrawl->DstRSet->Merge(RelUrlRSet);
  }
  // save related urls
  //TRSet::SaveXml(DstRSet, OutXmlUrlFNm);

  // collect related web-pages
  TGgWebFetchSaver WebFetchSaver(100);
  WebFetchSaver.PutProxyStr(ProxyStr);
  // get source-url web-page
  {bool Ok; TStr MsgStr;
  TWebFetchBlocking::GetWebPg(
   SrcUrlStr, Ok, MsgStr, FCrawl->SrcWebPg, NULL, ProxyStr);
  if (!Ok){FCrawl->SrcWebPg=NULL;}}
  // get related-urls web-page
  int FetchHits=FCrawl->DstRSet->GetHits();
  if ((MxCands!=-1)&&(MxCands<FetchHits)){FetchHits=MxCands;}
  for (int HitN=0; HitN<FetchHits; HitN++){
    TStr HitUrlStr=FCrawl->DstRSet->GetHitUrlStr(HitN);
    WebFetchSaver.FetchUrl(HitUrlStr);
  }
  TSysMsg::Loop();

  // save crawled web-pages
  for (int WebPgN=0; WebPgN<WebFetchSaver.GetWebPgs(); WebPgN++){
    PWebPg WebPg=WebFetchSaver.GetWebPg(WebPgN);
    FCrawl->UrlStrToWebPgH.AddDat(WebPg->GetUrlStr(), WebPg);
  }

  // create bag-of-words
  FCrawl->BowDocBs=TBowDocBs::New();
  FCrawl->SrcDId=FCrawl->BowDocBs->AddHtmlDoc(
   SrcUrlStr, TStrV(), FCrawl->SrcWebPg->GetHttpBodyAsStr());
  for (int WebPgN=0; WebPgN<WebFetchSaver.GetWebPgs(); WebPgN++){
    PWebPg WebPg=WebFetchSaver.GetWebPg(WebPgN);
    FCrawl->BowDocBs->AddHtmlDoc(
     WebPg->GetUrlStr(0), TStrV(), WebPg->GetHttpBodyAsStr());
  }

  // calculate similarities to the source document
  PBowDocWgtBs BowDocWgtBs=TBowDocWgtBs::New(FCrawl->BowDocBs, bwwtNrmTFIDF);
  PBowSim BowSim=TBowSim::New(bstCos);
  FCrawl->SimDIdKdV; FCrawl->SumSim=0;
  for (int DIdN=0; DIdN<BowDocWgtBs->GetDocs(); DIdN++){
    int DId=BowDocWgtBs->GetDId(DIdN);
    if (DId!=FCrawl->SrcDId){
      double Sim=BowSim->GetSim(
       BowDocWgtBs->GetSpV(FCrawl->SrcDId), BowDocWgtBs->GetSpV(DId));
      FCrawl->SimDIdKdV.Add(TFltIntKd(Sim, DId));
      FCrawl->SumSim+=Sim;
    }
  }
  FCrawl->SimDIdKdV.Sort(false);
  // set crawl ok
  FCrawl->Ok=true;
  // return focused-crawl
  return FCrawl;
}
Exemple #10
0
void TGgSchRef::GetAuthNmVPubStr(
 const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){
  // split input string into two parts
  TStr AuthNmVStr; TStr PubStr;
  AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr);
  // author-names string
  AuthNmVStr.SplitOnAllCh(',', AuthNmV, true);
  for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){
    AuthNmV[AuthN].ToTrunc();
  }
  if ((!AuthNmV.Empty())&&
   ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){
    AuthNmV.DelLast();
  }
  // publication-name & publication-year string
  TStr OriginStr; TStr LinkStr;
  PubStr.SplitOnStr(OriginStr, " - ", LinkStr);
  OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr);
  PubNm.ToTrunc(); PubYearStr.ToTrunc();
  if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubYearStr.GetSubStr(0, 3);
  } else
  if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubNm.GetSubStr(0, 3); PubNm="";
  } else {
    PubYearStr="";
  }
}
Exemple #11
0
void TGgSchBs::SetAuthCrawled(const TStr& AuthNm){
  AuthNmToCrawlPH.AddDat(AuthNm.GetLc())=true;
}
Exemple #12
0
void TStrParser::DocStrToSylIdV(const TStr& _DocStr, TIntV& SyllablesIdV) {
    TStr DocStr = _DocStr.GetUc();  // to upper case
    Fail;
}
Exemple #13
0
void TFile::Rename(const TStr& SrcFNm, const TStr& DstFNm){
  EAssertR(
   rename(SrcFNm.CStr(), DstFNm.CStr())==0,
   "Error renaming file '"+SrcFNm+"' to "+DstFNm+"'.");
}
Exemple #14
0
void TFRnd::PutStr(const TStr& Str){
  PutBf(Str.CStr(), Str.Len()+1);
}
Exemple #15
0
bool TDir::GenDir(const TStr& FPathFNm){
  return CreateDirectory(FPathFNm.CStr(), NULL)!=0;
}
Exemple #16
0
/////////////////////////////////////////////////
// Time-Profiler - poor-man's profiler
int TTmProfiler::AddTimer(const TStr& TimerNm) { 
	MxNmLen = TInt::GetMx(MxNmLen, TimerNm.Len());
	return TimerH.AddKey(TimerNm); 
}
Exemple #17
0
bool TDir::DelDir(const TStr& FPathFNm){
  return RemoveDirectory(FPathFNm.CStr())!=0;
}
Exemple #18
0
int InitKronecker(const TStr args, PNGraph &GD, TKronMtx& FitMtx){
	Env = TEnv(args, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("Kronecker graphs. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	TExeTm ExeTm;
	Try
	Env = TEnv(args, TNotify::StdNotify);
	//const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input graph file (single directed edge per line)");
	TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "initMatrix.txt", "Output file prefix");
	const TInt NZero = Env.GetIfArgPrefixInt("-n0:", 2, "Innitiator matrix size");
	const TStr InitMtx = Env.GetIfArgPrefixStr("-m:", "0.9 0.7; 0.5 0.2", "Init Gradient Descent Matrix (R=random)").GetLc();
	const TStr Perm = Env.GetIfArgPrefixStr("-p:", "d", "Initial node permutation: d:Degree, r:Random, o:Order").GetLc();
	const TInt GradIter = Env.GetIfArgPrefixInt("-gi:", 10, "Gradient descent iterations");
	const TFlt LrnRate = Env.GetIfArgPrefixFlt("-l:", 1e-5, "Learning rate");
	const TFlt MnStep = Env.GetIfArgPrefixFlt("-mns:", 0.005, "Minimum gradient step");
	const TFlt MxStep = Env.GetIfArgPrefixFlt("-mxs:", 0.05, "Maximum gradient step");
	const TInt WarmUp =  Env.GetIfArgPrefixInt("-w:", 10000, "Samples to warm up");
	const TInt NSamples = Env.GetIfArgPrefixInt("-s:", 100000, "Samples per gradient estimation");
	//const TInt GradType = Env.GetIfArgPrefixInt("-gt:", 1, "1:Grad1, 2:Grad2");
	const bool ScaleInitMtx = Env.GetIfArgPrefixBool("-sim:", true, "Scale the initiator to match the number of edges");
	const TFlt PermSwapNodeProb = Env.GetIfArgPrefixFlt("-nsp:", 1.0, "Probability of using NodeSwap (vs. EdgeSwap) MCMC proposal distribution");
	//if (OutFNm.Empty()) { OutFNm = TStr::Fmt("%s-fit%d", InFNm.GetFMid().CStr(), NZero()); }
	printf("%s\n", OutFNm.CStr());
	// load graph
	cout << "n0 = " << NZero << endl;
	// fit
	FILE *F = fopen(OutFNm.CStr(), "w");
	TKronMtx InitKronMtx = InitMtx=="r" ? TKronMtx::GetRndMtx(NZero, 0.1) : TKronMtx::GetMtx(InitMtx);
	InitKronMtx.Dump("INIT PARAM", true);
	TKroneckerLL KronLL(GD, InitKronMtx, PermSwapNodeProb);
	fprintf(F, "INIT PARAM\t%s, MTX SUM %f\n", InitKronMtx.GetMtxStr().CStr(), InitKronMtx.GetMtxSum());
	if (ScaleInitMtx) {
		InitKronMtx.SetForEdges(GD->GetNodes(), GD->GetEdges()); }
	KronLL.InitLL(GD, InitKronMtx);
	InitKronMtx.Dump("SCALED PARAM", true);
	fprintf(F, "SCALED PARAM\t%s, MTX SUM %f\n", InitKronMtx.GetMtxStr().CStr(), InitKronMtx.GetMtxSum());
	KronLL.SetPerm(Perm.GetCh(0));
	double LogLike = 0;
	//if (GradType == 1) {
	LogLike = KronLL.GradDescent(GradIter, LrnRate, MnStep, MxStep, WarmUp, NSamples);
	//} else if (GradType == 2) {
	//  LogLike = KronLL.GradDescent2(GradIter, LrnRate, MnStep, MxStep, WarmUp, NSamples); }
	//else{ Fail; }
	//const TKronMtx& FitMtx = KronLL.GetProbMtx();
	FitMtx = KronLL.GetProbMtx();

//	fprintf(F, "Input\t%s\n", InFNm.CStr());
	TStrV ParamV; Env.GetCmLn().SplitOnAllCh(' ', ParamV);
	fprintf(F, "Command line options\n");
	for (int i = 0; i < ParamV.Len(); i++) {
		fprintf(F, "\t%s\n", ParamV[i].CStr()+(ParamV[i][0]=='-'?1:0)); }
	fprintf(F, "Loglikelihood\t%10.2f\n", LogLike);
	fprintf(F, "Absolute error (based on expected number of edges)\t%f\n", KronLL.GetAbsErr());
	fprintf(F, "RunTime\t%g\n", ExeTm.GetSecs());
	fprintf(F, "Estimated initiator\t%s, mtx sum %f\n", FitMtx.GetMtxStr().CStr(), FitMtx.GetMtxSum());
	/*if (ScaleInitMtx) {
		FitMtx.SetForEdgesNoCut(GD->GetNodes(), GD->GetEdges()); }
	fprintf(F, "Scaled initiator\t%s, mtx sum %f\n", FitMtx.GetMtxStr().CStr(), FitMtx.GetMtxSum());
	FitMtx.Normalize();
	fprintf(F, "Normalized initiator\t%s, mtx sum %f\n", FitMtx.GetMtxStr().CStr(), FitMtx.GetMtxSum());*/
	fclose(F);

	Catch
		printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Exemple #19
0
bool TNEANetMP::IsOk(const bool& ThrowExcept) const {
    bool RetVal = true;
    for (int N = NodeH.FFirstKeyId(); NodeH.FNextKeyId(N); ) {
        const TNode& Node = NodeH[N];
        if (! Node.OutEIdV.IsSorted()) {
            const TStr Msg = TStr::Fmt("Out-edge list of node %d is not sorted.", Node.GetId());
            if (ThrowExcept) {
                EAssertR(false, Msg);
            }
            else {
                ErrNotify(Msg.CStr());
            }
            RetVal=false;
        }
        if (! Node.InEIdV.IsSorted()) {
            const TStr Msg = TStr::Fmt("In-edge list of node %d is not sorted.", Node.GetId());
            if (ThrowExcept) {
                EAssertR(false, Msg);
            }
            else {
                ErrNotify(Msg.CStr());
            }
            RetVal=false;
        }
        // check out-edge ids
        int prevEId = -1;
        for (int e = 0; e < Node.GetOutDeg(); e++) {
            if (! IsEdge(Node.GetOutEId(e))) {
                const TStr Msg = TStr::Fmt("Out-edge id %d of node %d does not exist.",  Node.GetOutEId(e), Node.GetId());
                if (ThrowExcept) {
                    EAssertR(false, Msg);
                }
                else {
                    ErrNotify(Msg.CStr());
                }
                RetVal=false;
            }
            if (e > 0 && prevEId == Node.GetOutEId(e)) {
                const TStr Msg = TStr::Fmt("Node %d has duplidate out-edge id %d.", Node.GetId(), Node.GetOutEId(e));
                if (ThrowExcept) {
                    EAssertR(false, Msg);
                }
                else {
                    ErrNotify(Msg.CStr());
                }
                RetVal=false;
            }
            prevEId = Node.GetOutEId(e);
        }
        // check in-edge ids
        prevEId = -1;
        for (int e = 0; e < Node.GetInDeg(); e++) {
            if (! IsEdge(Node.GetInEId(e))) {
                const TStr Msg = TStr::Fmt("Out-edge id %d of node %d does not exist.",  Node.GetInEId(e), Node.GetId());
                if (ThrowExcept) {
                    EAssertR(false, Msg);
                }
                else {
                    ErrNotify(Msg.CStr());
                }
                RetVal=false;
            }
            if (e > 0 && prevEId == Node.GetInEId(e)) {
                const TStr Msg = TStr::Fmt("Node %d has duplidate out-edge id %d.", Node.GetId(), Node.GetInEId(e));
                if (ThrowExcept) {
                    EAssertR(false, Msg);
                }
                else {
                    ErrNotify(Msg.CStr());
                }
                RetVal=false;
            }
            prevEId = Node.GetInEId(e);
        }
    }
    for (int E = EdgeH.FFirstKeyId(); EdgeH.FNextKeyId(E); ) {
        const TEdge& Edge = EdgeH[E];
        if (! IsNode(Edge.GetSrcNId())) {
            const TStr Msg = TStr::Fmt("Edge %d source node %d does not exist.", Edge.GetId(), Edge.GetSrcNId());
            if (ThrowExcept) {
                EAssertR(false, Msg);
            }
            else {
                ErrNotify(Msg.CStr());
            }
            RetVal=false;
        }
        if (! IsNode(Edge.GetDstNId())) {
            const TStr Msg = TStr::Fmt("Edge %d destination node %d does not exist.", Edge.GetId(), Edge.GetDstNId());
            if (ThrowExcept) {
                EAssertR(false, Msg);
            }
            else {
                ErrNotify(Msg.CStr());
            }
            RetVal=false;
        }
    }
    return RetVal;
}
Exemple #20
0
void TGnuPlot::CreatePlotFile(const TStr& Comment) {
  time_t ltime;  time(&ltime);
  char* TimeStr = ctime(&ltime);  TimeStr[strlen(TimeStr) - 1] = 0;
  // rearrange columns so that longest are on the left
  //SeriesV.Sort(false);
  TIntV SerIdV(SeriesV.Len(), 0);
  for (int i = 0; i < SeriesV.Len(); i++) { SerIdV.Add(i); }
  SerIdV.SortCmp(TGpSeriesCmp(SeriesV));
  // set columns
  int ColCnt = 1;
  bool SaveData = false;
  for (int s = 0; s < SeriesV.Len(); s++) {
    TGpSeries& Plt = SeriesV[SerIdV[s]];
    if (Plt.XYValV.Empty()) { continue; }
    Plt.DataFNm = DataFNm;
    // plots use same X column
    const int PrevCol = s > 0 ? IsSameXCol(SerIdV[s], SerIdV[s-1]) : -1;
    if (PrevCol != -1) { Plt.XCol = PrevCol; }
    else { Plt.XCol = ColCnt;  ColCnt++; }
    Plt.YCol = ColCnt;  ColCnt++;
    if (! Plt.ZValV.Empty()) { Plt.ZCol = ColCnt;  ColCnt++; }
    if (! Plt.XYValV.Empty()) { SaveData=true; }
  }
  // save data file (skip duplicate X columns)
  if (SaveData) {
    FILE *F = fopen(DataFNm.CStr(), "wt");
    EAssertR(F != NULL, TStr("Can not open data file ")+DataFNm);
    fprintf(F, "#\n");
    fprintf(F, "# %s (%s)\n", Comment.CStr(), TimeStr);
    fprintf(F, "#\n");
    // column names
    for (int i = 0; i < SerIdV.Len(); i++) {
      const TGpSeries& Ser = SeriesV[SerIdV[i]];
      if (Ser.XYValV.Empty()) { continue; }
      if (i == 0) { fprintf(F, "# "); } else { fprintf(F, "\t"); }
      if (Ser.SaveXVals()) {
        if (! LblX.Empty()) { fprintf(F, "%s\t", LblX.CStr()); }
        else { fprintf(F, "XVals\t"); }
      }
      if (Ser.Label.Empty()) { fprintf(F, "%s", LblY.CStr()); }
      else { fprintf(F, "%s", SeriesV[SerIdV[i]].Label.CStr()); }
      if (Ser.ZCol > 0) fprintf(F, "\tDeltaY");
    }
    fprintf(F, "\n");
    // data
    for (int row = 0; row < SeriesV[SerIdV[0]].XYValV.Len(); row++) {
      for (int i = 0; i < SeriesV.Len(); i++) {
        const TGpSeries& Ser = SeriesV[SerIdV[i]];
        if (row < Ser.XYValV.Len()) {
          if (i > 0) { fprintf(F, "\t"); }
          if (Ser.SaveXVals()) { fprintf(F, "%g\t%g", Ser.XYValV[row].Key(), Ser.XYValV[row].Dat()); }
          else { fprintf(F, "%g", Ser.XYValV[row].Dat()); }
          if (! Ser.ZValV.Empty()) { fprintf(F, "\t%g", Ser.ZValV[row]()); }
        }
      }
      fprintf(F, "\n");
    }
    fclose(F);
  }
  // save plot file
  FILE *F = fopen(PlotFNm.CStr(), "wt");
  EAssertR(F != 0, TStr("Can not open plot file ")+PlotFNm);
  TStr CurDir = TDir::GetCurDir();
  CurDir.ChangeStrAll("\\", "\\\\");
  fprintf(F, "#\n");
  fprintf(F, "# %s (%s)\n", Comment.CStr(), TimeStr);
  fprintf(F, "#\n\n");
  if (! Title.Empty()) fprintf(F, "set title \"%s\"\n", Title.CStr());
  fprintf(F, "set key bottom right\n");
  fprintf(F, "%s\n", GetScaleStr(ScaleTy).CStr());
  if (ScaleTy==gpsLog || ScaleTy==gpsLog10X || ScaleTy==gpsLog10XY) {
    fprintf(F, "set format x \"10^{%%L}\"\n");
    fprintf(F, "set mxtics 10\n"); }
  if (ScaleTy==gpsLog || ScaleTy==gpsLog10Y || ScaleTy==gpsLog10XY) {
    fprintf(F, "set format y \"10^{%%L}\"\n");
    fprintf(F, "set mytics 10\n"); }
  if (ScaleTy==gpsLog2X || ScaleTy==gpsLog2XY) { fprintf(F, "set format x \"2^{%%L}\"\n"); }
  if (ScaleTy==gpsLog2Y || ScaleTy==gpsLog2XY) { fprintf(F, "set format y \"2^{%%L}\"\n"); }
  if (SetGrid) fprintf(F, "set grid\n");
  if (XRange.Val1 != XRange.Val2) fprintf(F, "set xrange [%g:%g]\n", XRange.Val1(), XRange.Val2());
  if (YRange.Val1 != YRange.Val2) fprintf(F, "set yrange [%g:%g]\n", YRange.Val1(), YRange.Val2());
  if (! LblX.Empty()) fprintf(F, "set xlabel \"%s\"\n", LblX.CStr());
  if (! LblY.Empty()) fprintf(F, "set ylabel \"%s\"\n", LblY.CStr());
  if (Tics42 < -1) {
    Tics42 = GetTics42();
  }
  if (Tics42) {
    fprintf(F, "set tics scale 2\n"); // New in version 4.2
  } else {
    fprintf(F, "set ticscale 2 1\n"); // Old (deprecated)
  }
  // custom commands
  for (int i = 0; i < MoreCmds.Len(); i++) {
    fprintf(F, "%s\n", MoreCmds[i].CStr()); }
  // plot
  if (! SeriesV.Empty()) {
    fprintf(F, "plot \t");
    for (int i = 0; i < SeriesV.Len(); i++) {
      fprintf(F, "%s", GetSeriesPlotStr(i).CStr()); }
    fprintf(F, "\n");
  }
  if (SetPause) fprintf(F, "pause -1 \"Hit return to exit. %s\"\n", PlotFNm.CStr());
  fclose(F);
}
Exemple #21
0
TStr TEnv::GetVarVal(const TStr& VarNm) const {
    return TStr(getenv(VarNm.CStr()));
}
Exemple #22
0
TGnuPlot::TGnuPlot(const TStr& FileNm, const TStr& PlotTitle, const bool& Grid) :
 DataFNm(FileNm+".tab"), PlotFNm(FileNm+".plt"), Title(PlotTitle), LblX(), LblY(), ScaleTy(gpsAuto),
 YRange(0, 0), XRange(0, 0), SetGrid(Grid), SetPause(true),
 SeriesV(), MoreCmds() {
  IAssert(! FileNm.Empty());
}
Exemple #23
0
void THttpChDef::SetChTy(const THttpChTy& ChTy, const TStr& Str){
  for (int ChN=0; ChN<Str.Len(); ChN++){SetChTy(ChTy, Str[ChN]);}}
Exemple #24
0
TGnuPlot::TGnuPlot(const TStr& DataFileNm, const TStr& PlotFileNm, const TStr& PlotTitle, const bool& Grid) :
  DataFNm(DataFileNm.Empty() ? DefDataFNm : DataFileNm),
  PlotFNm(PlotFileNm.Empty() ? DefPlotFNm : PlotFileNm),
  Title(PlotTitle), LblX(), LblY(), ScaleTy(gpsAuto),
  YRange(0, 0), XRange(0, 0), SetGrid(Grid), SetPause(true), SeriesV(), MoreCmds() {
}
Exemple #25
0
TStr THttpChDef::GetLcStr(const TStr& Str){
  TChA LcStr;
  for (int ChN=0; ChN<Str.Len(); ChN++){LcStr+=GetLcCh(Str[ChN]);}
  return LcStr;
}
int main(int argc, char *argv[]) {
  // #### SETUP: Parse Arguments
  LogOutput Log;
  THash<TStr, TStr> Arguments;
  ArgumentParser::ParseArguments(argc, argv, Arguments, Log);

  TStr OutputDirectory = ArgumentParser::GetArgument(Arguments, "directory", "");
  TStr StartString = ArgumentParser::GetArgument(Arguments, "start", "2009-02-01");
  TStr QBDBCDirectory = ArgumentParser::GetArgument(Arguments, "qbdbc", "/lfs/1/tmp/curis/QBDBC-C/");
  TStr QBDBDirectory = ArgumentParser::GetArgument(Arguments, "qbdb", "/lfs/1/tmp/curis/QBDB/");
  TInt WindowSize = ArgumentParser::GetArgument(Arguments, "window", "14").GetInt();
  TStr EdgeString = ArgumentParser::GetArgument(Arguments, "edge", "lsh");
  QuoteGraph::SetEdgeCreation(EdgeString);

  Log.DisableLogging();

  TQuoteBase QB;
  TDocBase DB;
  TClusterBase CB;

  // Start memeseed after a break in the middle
  TStr LastDate = ArgumentParser::GetArgument(Arguments, "last", "");
  if (LastDate != "") {
    {
    TQuoteBase OldQB;
    TDocBase OldDB;
    TClusterBase OldCB;
    PNGraph OldQGraph;
    TDataLoader::LoadCumulative(QBDBCDirectory, LastDate, OldQB, OldDB, OldCB, OldQGraph);
    QB = TQuoteBase(OldQB.GetCounter());
    DB = TDocBase(OldDB.GetCounter());
    CB = TClusterBase(OldCB.GetCounter());
    }
    Err("Counters updated!\n");
  }

  TStr LogDirectory;
  Log.GetDirectory(LogDirectory);
  Err("Output directory: %s\n", LogDirectory.CStr());

  // #### DATA LOADING: Load ALL the things!
  fprintf(stderr, "Loading QB and DB from file for %d days, starting from %s...\n", WindowSize.Val, StartString.CStr());
  Err("%s\n", QBDBDirectory.CStr());
  TSecTm PresentTime = TDataLoader::LoadQBDBByWindow(QBDBDirectory, StartString, WindowSize, QB, DB);
  fprintf(stderr, "\tQBDB successfully loaded!\n");

  // #### CLUSTERING STEP
  fprintf(stderr, "Creating clusters\n");
  QuoteGraph GraphCreator(&QB, &CB);
  PNGraph QGraph;
  GraphCreator.CreateGraph(QGraph);
  Clustering ClusterJob(QGraph);
  ClusterJob.BuildClusters(&CB, &QB, &DB, Log, PresentTime, false);

  TIntV AllClusters;
  CB.GetAllClusterIdsSortByFreq(AllClusters);
  //PostCluster::FilterAndCacheClusterSize(&DB, &QB, &CB, Log, AllClusters, PresentTime);
  //PostCluster::FilterAndCacheClusterPeaks(&DB, &QB, &CB, Log, AllClusters, PresentTime);

  int NumClusters = AllClusters.Len();
  FILE* F = fopen("nifty-day-2012-01-01.txt", "w");
  for (int i = 0; i < NumClusters; i++) {
    TCluster C;
    CB.GetCluster(AllClusters[i], C);
    TIntV Quotes;

    C.GetQuoteIds(Quotes);
    TStr RepStr;
    C.GetRepresentativeQuoteString(RepStr, &QB);
    fprintf(F, "%d\t%d\t%s\t%d\n", Quotes.Len(), C.GetNumQuotes().Val, RepStr.CStr(), C.GetId().Val);
    for (int j = 0; j < Quotes.Len(); j++) {
      TQuote Q;
      QB.GetQuote(Quotes[j], Q);
      TStr Str;
      Q.GetContentString(Str);
      fprintf(F, "\t%d\t%d\t%s\t%d\n", Q.GetNumSources().Val, Q.GetNumSources().Val, Str.CStr(), Q.GetId().Val);
    }
    fprintf(F, "\n");

  }
  fclose(F);

  TStr Directory;
  Log.GetDirectory(Directory);
  Err("Done with memeoutput! Directory created at: %s\n", Directory.CStr());
  //printf("%d\n", TStringUtil::f_counter);
  return 0;
}
Exemple #27
0
bool THttp::IsGifFExt(const TStr& FExt){
  return (FExt.GetUc()==TFile::GifFExt.GetUc());
}
/// MCMC fitting
void TAGMFit::RunMCMC(const int& MaxIter, const int& EvalLambdaIter, const TStr& PlotFPrx) {
  TExeTm IterTm, TotalTm;
  double PrevL = Likelihood(), DeltaL = 0;
  double BestL = PrevL;
  printf("initial likelihood = %f\n",PrevL);
  TIntFltPrV IterTrueLV, IterJoinV, IterLeaveV, IterAcceptV, IterSwitchV, IterLBV;
  TIntPrV IterTotMemV;
  TIntV IterV;
  TFltV BestLV;
  TVec<TIntSet> BestCmtySetV;
  int SwitchCnt = 0, LeaveCnt = 0, JoinCnt = 0, AcceptCnt = 0, ProbBinSz;
  int Nodes = G->GetNodes(), Edges = G->GetEdges();
  TExeTm PlotTm;
  ProbBinSz = TMath::Mx(1000, G->GetNodes() / 10); //bin to compute probabilities
  IterLBV.Add(TIntFltPr(1, BestL));

  for (int iter = 0; iter < MaxIter; iter++) {
    IterTm.Tick();
    int NID = -1;
    int JoinCID = -1, LeaveCID = -1;
    SampleTransition(NID, JoinCID, LeaveCID, DeltaL); //sample a move
    double OptL = PrevL;
    if (DeltaL > 0 || Rnd.GetUniDev() < exp(DeltaL)) { //if it is accepted
      IterTm.Tick();
      if (LeaveCID > -1 && LeaveCID != BaseCID) { LeaveCom(NID, LeaveCID); }
      if (JoinCID > -1 && JoinCID != BaseCID) { JoinCom(NID, JoinCID); }
      if (LeaveCID > -1 && JoinCID > -1 && JoinCID != BaseCID && LeaveCID != BaseCID) { SwitchCnt++; }
      else if (LeaveCID > -1  && LeaveCID != BaseCID) { LeaveCnt++;}
      else if (JoinCID > -1 && JoinCID != BaseCID) { JoinCnt++;}
      AcceptCnt++;
      if ((iter + 1) % EvalLambdaIter == 0) {
        IterTm.Tick();
        MLEGradAscentGivenCAG(0.01, 3);
        OptL = Likelihood();
      }
      else{
        OptL = PrevL + DeltaL;
      }
      if (BestL <= OptL && CIDNSetV.Len() > 0) {
        BestCmtySetV = CIDNSetV;
        BestLV = LambdaV;
        BestL = OptL;
      }
    }
    if (iter > 0 && (iter % ProbBinSz == 0) && PlotFPrx.Len() > 0) { 
      IterLBV.Add(TIntFltPr(iter, OptL));
      IterSwitchV.Add(TIntFltPr(iter, (double) SwitchCnt / (double) AcceptCnt));
      IterLeaveV.Add(TIntFltPr(iter, (double) LeaveCnt / (double) AcceptCnt));
      IterJoinV.Add(TIntFltPr(iter, (double) JoinCnt / (double) AcceptCnt));
      IterAcceptV.Add(TIntFltPr(iter, (double) AcceptCnt / (double) ProbBinSz));
      SwitchCnt = JoinCnt = LeaveCnt = AcceptCnt = 0;
    }
    PrevL = OptL;
    if ((iter + 1) % 10000 == 0) {
      printf("\r%d iterations completed [%.2f]", iter, (double) iter / (double) MaxIter);
    }
  }
  
  // plot the likelihood and acceptance probabilities if the plot file name is given
  if (PlotFPrx.Len() > 0) {
    TGnuPlot GP1;
    GP1.AddPlot(IterLBV, gpwLinesPoints, "likelihood");
    GP1.SetDataPlotFNm(PlotFPrx + ".likelihood.tab", PlotFPrx + ".likelihood.plt");
    TStr TitleStr = TStr::Fmt(" N:%d E:%d", Nodes, Edges);
    GP1.SetTitle(PlotFPrx + ".likelihood" + TitleStr);
    GP1.SavePng(PlotFPrx + ".likelihood.png");

    TGnuPlot GP2;
    GP2.AddPlot(IterSwitchV, gpwLinesPoints, "Switch");
    GP2.AddPlot(IterLeaveV, gpwLinesPoints, "Leave");
    GP2.AddPlot(IterJoinV, gpwLinesPoints, "Join");
    GP2.AddPlot(IterAcceptV, gpwLinesPoints, "Accept");
    GP2.SetTitle(PlotFPrx + ".transition");
    GP2.SetDataPlotFNm(PlotFPrx + "transition_prob.tab", PlotFPrx + "transition_prob.plt");
    GP2.SavePng(PlotFPrx + "transition_prob.png");
  }
  CIDNSetV = BestCmtySetV;
  LambdaV = BestLV;

  InitNodeData();
  MLEGradAscentGivenCAG(0.001, 100);
  printf("\nMCMC completed (best likelihood: %.2f) [%s]\n", BestL, TotalTm.GetTmStr());
}
Exemple #29
0
// Standardize first and lastnames into <last_name>_<first name innitial>
TStr TStrUtil::GetStdName(TStr AuthorName) {
  TStr StdName;
  AuthorName.ToLc();
  AuthorName.ChangeChAll('\n', ' ');
  AuthorName.ChangeChAll('.', ' ');
  // if there is a number in the name, remove it and everything after it
  int i, pos = 0;
  while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
    pos++; }
  if (pos < AuthorName.Len()) {
    AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc(); }
  if (AuthorName.Empty()) { return TStr::GetNullStr(); }

  // replace everything after '('
  int b = AuthorName.SearchCh('(');
  if (b != -1) {
    AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc(); }
  // skip if contains ')'
  if (AuthorName .SearchCh(')')!=-1) { return TStr::GetNullStr(); }
  // skip if it is not a name
  if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
   || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
    return TStr::GetNullStr();
  }
  // remove all non-letters (latex tags, ...)
  TChA NewName;
  for (i = 0; i < AuthorName.Len(); i++) {
    const char Ch = AuthorName[i];
    if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') { NewName += Ch; }
  }
  StdName = NewName;  StdName.ToTrunc();
  TStrV AuthNmV; StdName.SplitOnWs(AuthNmV);
  // too short -- not a name
  if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
  if (AuthNmV.Len() < 2) return TStr::GetNullStr();

  const TStr LastNm = AuthNmV.Last();
  if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();

  IAssert(isalpha(AuthNmV[0][0]));
  return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
}
Exemple #30
0
TMIn::TMIn(const TStr& Str):
  TSBase("Input-Memory"), TSIn("Input-Memory"), Bf(NULL), BfC(0), BfL(0){
  BfL=Str.Len(); Bf=new char[BfL]; strncpy(Bf, Str.CStr(), BfL);
}