Beispiel #1
0
TEST(TStr, Search) {
	TStr Str = "abcdaaba";
	int Len = Str.Len();
	EXPECT_EQ(Str.CountCh('a'), 4);
	EXPECT_EQ(Str.CountCh('b'), 2);
	EXPECT_EQ(Str.CountCh('e'), 0);

	EXPECT_TRUE(Str.IsChIn('a'));
	EXPECT_TRUE(Str.IsChIn('b'));
	EXPECT_FALSE(Str.IsChIn('e'));	

	EXPECT_TRUE(Str.IsStrIn(Str));
	EXPECT_TRUE(Str.IsStrIn(""));
	EXPECT_TRUE(Str.IsStrIn("bcd"));
	EXPECT_TRUE(Str.IsStrIn("ab"));
	EXPECT_FALSE(Str.IsStrIn("eba"));


	EXPECT_EQ(Str.CountCh('a', 1), 3);
	EXPECT_ANY_THROW(Str.CountCh('a', 10));
	EXPECT_EQ(Str.CountCh('b', 2), 1);
	EXPECT_EQ(Str.CountCh('e', 1), 0);

	EXPECT_EQ(Str.SearchCh('a'), 0);
	EXPECT_EQ(Str.SearchCh('b'), 1);
	EXPECT_EQ(Str.SearchCh('e'), -1);

	EXPECT_EQ(Str.SearchCh('a', 1), 4);
	EXPECT_EQ(Str.SearchCh('b', 2), 6);
	EXPECT_EQ(Str.SearchCh('e', 1), -1);

	EXPECT_EQ(Str.SearchChBack('a'), Len - 1);
	EXPECT_EQ(Str.SearchChBack('b'), Len - 2);
	EXPECT_EQ(Str.SearchChBack('e'), -1);

	EXPECT_EQ(Str.SearchChBack('a', Len - 2), Len - 3);
	EXPECT_EQ(Str.SearchChBack('b', Len - 3), 1);;
	EXPECT_EQ(Str.SearchChBack('e', 3), -1);	

	EXPECT_EQ(Str.SearchStr("a"), 0);
	EXPECT_EQ(Str.SearchStr("b"), 1);
	EXPECT_EQ(Str.SearchStr("e"), -1);
	EXPECT_EQ(Str.SearchStr(""), 0);

	EXPECT_EQ(Str.SearchStr("a", 1), 4);
	EXPECT_EQ(Str.SearchStr("b", 2), 6);
	EXPECT_EQ(Str.SearchStr("e", 1), -1);
}
Beispiel #2
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("ragm. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix");
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input edgelist file name");
  const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) ");
  int OptComs = Env.GetIfArgPrefixInt("-c:", -1, "The number of communities to detect (-1: detect automatically)");
  const int MinComs = Env.GetIfArgPrefixInt("-mc:", 5, "Minimum number of communities to try");
  const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 100, "Maximum number of communities to try");
  const int DivComs = Env.GetIfArgPrefixInt("-nc:", 10, "How many trials for the number of communities");
  const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 1, "Number of threads for parallelization");
  const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.3, "Alpha for backtracking line search");
  const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search");

  PUNGraph G;
  TIntStrH NIDNameH;
  if (InFNm.IsStrIn(".ungraph")) {
    TFIn GFIn(InFNm);
    G = TUNGraph::Load(GFIn);
  } else {
    G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH);
  }
  if (LabelFNm.Len() > 0) {
    TSsParser Ss(LabelFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
    }
  }
  else {
    
  }
  printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges());
  
  TVec<TIntV> EstCmtyVV;
  TExeTm RunTm;
  TAGMFast RAGM(G, 10, 10);
  
  if (OptComs == -1) {
    printf("finding number of communities\n");
    OptComs = RAGM.FindComsByCV(NumThreads, MaxComs, MinComs, DivComs, OutFPrx, StepAlpha, StepBeta);
  }

  RAGM.NeighborComInit(OptComs);
  if (NumThreads == 1 || G->GetEdges() < 1000) {
    RAGM.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta);
  } else {
    RAGM.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta);
  }
  RAGM.GetCmtyVV(EstCmtyVV);
   TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH);
  TAGMUtil::SaveGephi(OutFPrx + "graph.gexf", G, EstCmtyVV, 1.5, 1.5, NIDNameH);

  Catch

  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  return 0;
}
Beispiel #3
0
int Intersect(TUNGraph::TNodeI Node, TStr NNodes){
  int br = 0;

  TInt digi = -1;
  TStr buf = "";

  for (int i = 0; i<Node.GetDeg(); i++)
  {
    digi = Node.GetNbrNId(i);
    TStr buf = digi.GetStr();

    if (NNodes.IsStrIn(buf.CStr()))
      br++;
  }

  digi = Node.GetId();
  buf = digi.GetStr();

  if (NNodes.IsStrIn(buf.CStr()))
    br++;

  return br;
}
Beispiel #4
0
void TVizMapContext::PaintMgGlass(PGks Gks, const int& KeyWdFontSize) {
    // drawing the dark circle
    TFltRect ZoomRect = GetZoomRect();
    int SizeX = TFlt::Round((MgGlassSize/ZoomRect.GetXLen()) * Gks->GetWidth());
    int SizeY = TFlt::Round((MgGlassSize/ZoomRect.GetYLen()) * Gks->GetHeight());
    Gks->SetBrush(TGksBrush::New(ColorMgGlass));
    Gks->FillEllipse(ScreenX-SizeX, ScreenY-SizeY, ScreenX+SizeX, ScreenY+SizeY);
    // drawing the keywords
    if (MgGlassKeyWdV.Len() > 0) {
        // prepare the string
        Gks->SetFont(TGksFont::New("ARIAL", KeyWdFontSize, TGksColor::GetBlack(), TFSet()|gfsBold));
        TStr KeyWdStr = Gks->BreakTxt(MgGlassKeyWdV, ", ", ",", MgGlassWindowWidth);       
        TStr NearPointStr;
        if (NearPointN != -1) {
            PVizMapPoint NearPoint = VizMapFrame->GetPoint(NearPointN);
            if (NearPoint->IsPointNm()) {
                TStr NearPointNm = NearPoint->GetPointNm();
                if (NearPointNm.IsStrIn("[[")) {
                    const int StartPos = NearPointNm.SearchStr("[[");
                    NearPointNm = NearPointNm.Left(StartPos - 1);
                }
                NearPointStr = Gks->BreakTxt(NearPointNm, 
                    " ", "", MgGlassWindowWidth, 1);
                NearPointStr.DelChAll('\n');
                NearPointStr += "\n";
            }
        }
        TStr DocCountStr = "#documents = " + MgGlassPoints.GetStr() + "\n";
        // compose the final message
        KeyWdStr = NearPointStr + DocCountStr + KeyWdStr;
        // find position of the window
        int WndWidth = Gks->GetTxtWidth(KeyWdStr) + 6;
        int WndHeight = Gks->GetTxtHeight(KeyWdStr) + 6;
        int PosX = ScreenX + 20, PosY = ScreenY + 20;
        if (PosX + WndWidth > Gks->GetWidth()) {
            PosX = ScreenX - 20 - WndWidth; }
        if (PosY + WndHeight > Gks->GetHeight()) {
            PosY = ScreenY - 20 - WndHeight; }
        // draw the keyword string
        Gks->SetBrush(TGksBrush::New(ColorMgGlassWndShadow));
        Gks->FillRect(PosX + 5, PosY + 5, 
            PosX + WndWidth + 5, PosY + WndHeight + 5);
        Gks->SetBrush(TGksBrush::New(ColorMgGlassWnd));
        Gks->SetPen(TGksPen::New(ColorMgGlassWndFrm));
        Gks->Rectangle(PosX, PosY, 
            PosX + WndWidth, PosY + WndHeight);
        Gks->PutTxt(KeyWdStr, PosX+3, PosY+3);
    }
}
Beispiel #5
0
TFltRect TVizMapContext::PaintPointNm(PGks Gks, PVizMapPoint Point, const int& X, 
        const int& Y, const int& PointFontSize, const int& PointNmFontScale, 
        const bool& SelPointP, const bool& IsCatP) {

    // get and clean point name
    TStr PointNm = Point->GetPointNm(); 
    PointNm.ChangeChAll('_', ' ');
    if (PointNm.IsStrIn("[[")) {
        const int StartPos = PointNm.SearchStr("[[");
        PointNm = PointNm.Left(StartPos - 1);
    }
    // set font
    TGksColor FontColor = SelPointP ? ColorSelPointFont : ColorPointFont;
    const int FontSize = PointFontSize + TFlt::Round(Point->GetWgt()*PointNmFontScale);
    //TFSet FontStyle = IsCatP ? (TFSet() | gfsBold) : TFSet();
    //Gks->SetFont(TGksFont::New("ARIAL", FontSize, FontColor, FontStyle));
    Gks->SetFont(TGksFont::New("ARIAL", FontSize, FontColor));
    // refit it for the screen
    TStr ScreenPointNm = Gks->BreakTxt(PointNm, " ", "", PointNmWidth, PointNmMxLines);
    // calculate string position on the screen
    const int HalfTxtWidth = Gks->GetTxtWidth(ScreenPointNm) / 2;
    const int HalfTxtHeight = Gks->GetTxtHeight(ScreenPointNm) / 2;
     // draw it!
    const int MnX = X - HalfTxtWidth;
    int CurrY = Y - HalfTxtHeight;
    TStrV LineV; ScreenPointNm.SplitOnAllCh('\n', LineV);
    for (int LineN = 0; LineN < LineV.Len(); LineN++) {
        const int HalfLineWidth = Gks->GetTxtWidth(LineV[LineN]) / 2;
        const int LineHeight = Gks->GetTxtHeight(LineV[LineN]);
        Gks->PutTxt(LineV[LineN], MnX + (HalfTxtWidth - HalfLineWidth), CurrY);
        CurrY += LineHeight-3;
    }
    // finish
    return TFltRect(X - HalfTxtWidth, Y - HalfTxtHeight,
        X + HalfTxtWidth, Y + HalfTxtHeight - LineV.Len()*3);
}
Beispiel #6
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("cesna. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix");
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "./1912.edges", "Input edgelist file name");
  const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) ");
  const TStr AttrFNm = Env.GetIfArgPrefixStr("-a:", "./1912.nodefeat", "Input node attribute file name");
  const TStr ANameFNm = Env.GetIfArgPrefixStr("-n:", "./1912.nodefeatnames", "Input file name for node attribute names");
  int OptComs = Env.GetIfArgPrefixInt("-c:", 10, "The number of communities to detect (-1: detect automatically)");
  const int MinComs = Env.GetIfArgPrefixInt("-mc:", 3, "Minimum number of communities to try");
  const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 20, "Maximum number of communities to try");
  const int DivComs = Env.GetIfArgPrefixInt("-nc:", 5, "How many trials for the number of communities");
  const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 4, "Number of threads for parallelization");
  const double AttrWeight = Env.GetIfArgPrefixFlt("-aw:", 0.5, "We maximize (1 - aw) P(Network) + aw * P(Attributes)");
  const double LassoWeight = Env.GetIfArgPrefixFlt("-lw:", 1.0, "Weight for l-1 regularization on learning the logistic model parameters");
  const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.05, "Alpha for backtracking line search");
  const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search");
  const double MinFeatFrac = Env.GetIfArgPrefixFlt("-mf:", 0.0, "If the fraction of nodes with positive values for an attribute is smaller than this, we ignore that attribute");

#ifdef USE_OPENMP
  omp_set_num_threads(NumThreads);
#endif
  PUNGraph G;
  TIntStrH NIDNameH;
  TStrHash<TInt> NodeNameH;
  TVec<TFltV> Wck;
  TVec<TIntV> EstCmtyVV;
  if (InFNm.IsStrIn(".ungraph")) {
    TFIn GFIn(InFNm);
    G = TUNGraph::Load(GFIn);
  } else {
    G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NodeNameH);
    NIDNameH.Gen(NodeNameH.Len());
    for (int s = 0; s < NodeNameH.Len(); s++) { NIDNameH.AddDat(s, NodeNameH.GetKey(s)); }

  }
  if (LabelFNm.Len() > 0) {
    TSsParser Ss(LabelFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
    }
  }
  printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges());

  //load attribute
  TIntV NIDV;
  G->GetNIdV(NIDV);
  THash<TInt, TIntV> RawNIDAttrH, NIDAttrH;
  TIntStrH RawFeatNameH, FeatNameH;
  if (ANameFNm.Len() > 0) {
    TSsParser Ss(ANameFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { RawFeatNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
    }
  }

  TCesnaUtil::LoadNIDAttrHFromNIDKH(NIDV, AttrFNm, RawNIDAttrH, NodeNameH);
  TCesnaUtil::FilterLowEntropy(RawNIDAttrH, NIDAttrH, RawFeatNameH, FeatNameH, MinFeatFrac);

  TExeTm RunTm;
  TCesna CS(G, NIDAttrH, 10, 10);
  
  if (OptComs == -1) {
    printf("finding number of communities\n");
    OptComs = CS.FindComs(NumThreads, MaxComs, MinComs, DivComs, "", false, 0.1, StepAlpha, StepBeta);
  }

  CS.NeighborComInit(OptComs);
  CS.SetWeightAttr(AttrWeight);
  CS.SetLassoCoef(LassoWeight);
  if (NumThreads == 1 || G->GetEdges() < 1000) {
    CS.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta);
  } else {
    CS.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta);
  }
  CS.GetCmtyVV(EstCmtyVV, Wck);
  TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH);
  FILE* F = fopen((OutFPrx + "weights.txt").CStr(), "wt");
  if (FeatNameH.Len() == Wck[0].Len()) {
    fprintf(F, "#");
    for (int k = 0; k < FeatNameH.Len(); k++) {
      fprintf(F, "%s", FeatNameH[k].CStr());
      if (k < FeatNameH.Len() - 1) { fprintf(F, "\t"); }
    }
    fprintf(F, "\n");
  }
  for (int c = 0; c < Wck.Len(); c++) {
    for (int k = 0; k < Wck[c].Len(); k++) {
      fprintf(F, "%f", Wck[c][k].Val);
      if (k < Wck[c].Len() - 1) { fprintf(F, "\t"); }
    }
    fprintf(F, "\n");
  }
  fclose(F);

  Catch

  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  return 0;
}