예제 #1
0
// Test node, edge creation
void ManipulateNodesEdges() {
  int NNodes = 10000;
  int NEdges = 100000;
  const char *FName = "demo.net.dat";

  TPt <TNodeEDatNet<TInt, TInt> > Net;
  TPt <TNodeEDatNet<TInt, TInt> > Net1;
  TPt <TNodeEDatNet<TInt, TInt> > Net2;
  int i;
  int n;
  int NCount;
  int ECount1;
  int ECount2;
  int x,y;
  bool t;

  Net = TNodeEDatNet<TInt, TInt>::New();
  t = Net->Empty();

  // create the nodes
  for (i = 0; i < NNodes; i++) {
    Net->AddNode(i);
  }
  t = Net->Empty();
  n = Net->GetNodes();

  // create random edges
  NCount = NEdges;
  while (NCount > 0) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    // Net->GetEdges() is not correct for the loops (x == y),
    // skip the loops in this test
    if (x != y  &&  !Net->IsEdge(x,y)) {
      n = Net->AddEdge(x, y);
      NCount--;
    }
  }
  PrintNStats("ManipulateNodesEdges:Net", Net);

  // get all the nodes
  NCount = 0;
  for (TNodeEDatNet<TInt, TInt>::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) {
    NCount++;
  }

  // get all the edges for all the nodes
  ECount1 = 0;
  for (TNodeEDatNet<TInt, TInt>::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) {
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      ECount1++;
    }
  }

  // get all the edges directly
  ECount2 = 0;
  for (TNodeEDatNet<TInt, TInt>::TEdgeI EI = Net->BegEI(); EI < Net->EndEI(); EI++) {
    ECount2++;
  }
  printf("network ManipulateNodesEdges:Net, nodes %d, edges1 %d, edges2 %d\n",
      NCount, ECount1, ECount2);

  // assignment
  Net1 = TNodeEDatNet<TInt, TInt>::New();
  *Net1 = *Net;
  PrintNStats("ManipulateNodesEdges:Net1",Net1);

  // save the network
  {
    TFOut FOut(FName);
    Net->Save(FOut);
    FOut.Flush();
  }

  // load the network
  {
    TFIn FIn(FName);
    Net2 = TNodeEDatNet<TInt, TInt>::Load(FIn);
  }
  PrintNStats("ManipulateNodesEdges:Net2",Net2);

  // remove all the nodes and edges
  for (i = 0; i < NNodes; i++) {
    n = Net->GetRndNId();
    Net->DelNode(n);
  }
  PrintNStats("ManipulateNodesEdges:Net",Net);

  Net1->Clr();
  PrintNStats("ManipulateNodesEdges:Net1",Net1);
}
예제 #2
0
파일: demo-TNGraph.cpp 프로젝트: Accio/snap
// Test node, edge creation
void ManipulateNodesEdges() {
  int NNodes = 10000;
  int NEdges = 100000;
  const char *FName = "demo.graph.dat";

  PNGraph Graph;
  PNGraph Graph1;
  PNGraph Graph2;
  int i;
  int n;
  int NCount;
  int ECount1;
  int ECount2;
  int x,y;
  bool t;

  Graph = TNGraph::New();
  t = Graph->Empty();

  // create the nodes
  for (i = 0; i < NNodes; i++) {
    Graph->AddNode(i);
  }
  t = Graph->Empty();
  n = Graph->GetNodes();

  // create random edges
  NCount = NEdges;
  while (NCount > 0) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    // Graph->GetEdges() is not correct for the loops (x == y),
    // skip the loops in this test
    if (x != y  &&  !Graph->IsEdge(x,y)) {
      n = Graph->AddEdge(x, y);
      NCount--;
    }
  }
  PrintGStats("ManipulateNodesEdges:Graph",Graph);

  // get all the nodes
  NCount = 0;
  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NCount++;
  }

  // get all the edges for all the nodes
  ECount1 = 0;
  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      ECount1++;
    }
  }

  // get all the edges directly
  ECount2 = 0;
  for (TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
    ECount2++;
  }
  printf("ManipulateNodesEdges:Graph, nodes %d, edges1 %d, edges2 %d\n",
      NCount, ECount1, ECount2);

  // assignment
  Graph1 = TNGraph::New();
  *Graph1 = *Graph;
  PrintGStats("ManipulateNodesEdges:Graph1",Graph1);

  // save the graph
  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
  }

  // load the graph
  {
    TFIn FIn(FName);
    Graph2 = TNGraph::Load(FIn);
  }
  PrintGStats("ManipulateNodesEdges:Graph2",Graph2);

  // remove all the nodes and edges
  for (i = 0; i < NNodes; i++) {
    n = Graph->GetRndNId();
    Graph->DelNode(n);
  }

  PrintGStats("ManipulateNodesEdges:Graph",Graph);

  Graph1->Clr();
  PrintGStats("ManipulateNodesEdges:Graph1",Graph1);
}
예제 #3
0
파일: fl.cpp 프로젝트: adobekan/qminer
bool TFile::Exists(const TStr& FNm){
  if (FNm.Empty()) { return false; }
  bool DoExists;
  TFIn FIn(FNm, DoExists);
  return DoExists;
}
예제 #4
0
void BigMain(int argc, char* argv[]) {
  TExeTm ExeTm;
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs("QuotesApp");
  const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
  if (Env.IsEndOfRun()) {
    printf("To do:\n");
    printf("    MkDataset         : Make memes dataset (extract quotes and save txt)\n");
    printf("    ExtractSubset     : Extract a subset of memes containing particular words\n");
    printf("    MemesToQtBs       : Load memes dataset and create quote base\n");
    printf("    MkClustNet        : Build cluster network from the quote base\n");
    return;
  }	
#pragma region mkdataset
  // extract quotes and links and make them into a single file
  if (ToDo == "mkdataset") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
    const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
    const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
    const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
    //// parse directly from Spinn3r
    TStr Spinn3rFNm;
    THashSet<TMd5Sig> SeenUrlSet;
    if (UrlOnlyOnce && ! UrlFNm.Empty()) {  // keep track of already seen urls (so that there are no duplicate urls)
      TFIn FIn(UrlFNm);  SeenUrlSet.Load(FIn);
    }
    FILE *F = fopen(OutFNm.CStr(), "wt");
    TFIn FIn(InFNm);
    int Items=0;
    for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
      TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
      printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
      fflush(stdout);
      for (int item = 0; QE.Next(); item++) {
        const TMd5Sig PostMd5(QE.PostUrlStr);
        if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
        if (UrlOnlyOnce) {
          if (SeenUrlSet.IsKey(PostMd5)) { continue; }
          SeenUrlSet.AddKey(PostMd5);
        }
        fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
        //if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
        fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
        for (int q = 0; q < QE.QuoteV.Len(); q++) {
          if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
            fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
        }
        for (int l = 0; l < QE.LinkV.Len(); l++) {
          fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
        fprintf(F, "\n");
        if (item>0 && item % Kilo(100) == 0) {
          QE.DumpStat();  QE.ExeTm.Tick(); }
        Items++;
      }
      printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
      fflush(stdout);
    }
    printf("all done. Saving %d post urls\n", SeenUrlSet.Len());  fflush(stdout);
    if (! SeenUrlSet.Empty()) {
      TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
      SeenUrlSet.Save(FOut);
    }
    fclose(F);
  }
#pragma endregion mkdataset

#pragma region extractsubset
  // save posts with memes containing particular words
  else if (ToDo == "extractsubset") {
    const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
    const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
    const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
    const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");

    TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
    printf("Loading %s\n", WordsFNm.CStr());
    { TFIn FIn(WordsFNm);
    for (TStr Ln; FIn.GetNextLn(Ln); ) {
      printf("  %s\n", Ln.GetLc().CStr());
      CatchMemeV.Add(Ln.GetLc()); }
    }
    printf("%d strings loaded\n", CatchMemeV.Len());
    TFOut FOut(OutFNm);
    TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
    for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
      bool DoSave = false;
      for (int m = 0; m < Memes.MemeV.Len(); m++) {
        for (int i = 0; i < CatchMemeV.Len(); i++) {
          if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
            DoSave=true; break; }
        }
        if (DoSave) { break; }
      }
      if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
      if (posts % Mega(1) == 0) {
        printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
        FOut.Flush();
      }
    }
  }
#pragma endregion extractsubset

#pragma region memestoqtbs
  // load memes dataset (MkDataset) and create quote base
  else if (ToDo == "memestoqtbs") {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201007_201107.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    const TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-l:", 4, "Min quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");
		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20100714", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20110728", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		PQuoteBs QtBs = TQuoteBs::New();
		int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
		int UrlSetSize = 4 * HashTableSize;
		QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
		}
#pragma endregion memestoqtbs

#pragma region mkclustnet
  // make cluster network
  else if (ToDo == "mkclustnet") {
    TStr InQtBsNm = Env.GetIfArgPrefixStr("-i:", "", "Input quote base file name");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output network/updated QtBs filename");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
    const int MinMemeFq = Env.GetIfArgPrefixInt("-mf:", 5, "Min meme frequency");
		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");

		// Load quote base
    PQuoteBs QtBs;
    if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
    else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
  }
#pragma endregion mkclustnet

#pragma region memeclust
	else if (ToDo.SearchStr(TStr("memeclust")) >= 0) {
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "201101.txt", "Input Memes dataset files");
    const TStr MediaUrlFNm = Env.GetIfArgPrefixStr("-u:", "news_media.txt", "Fule with news media urls");
    TStr Pref = Env.GetIfArgPrefixStr("-o:", "qt", "Output file name prefix");
    const int MinWrdLen = Env.GetIfArgPrefixInt("-minl:", 4, "Min quote word length");
		const int MaxWrdLen = Env.GetIfArgPrefixInt("-maxl:", 200, "Max quote word length");
		const int MinMemeFq = Env.GetIfArgPrefixInt("-f:", 5, "Min meme frequency");

		const int MinClustFq = Env.GetIfArgPrefixInt("-cf:", 50, "Min quote cluster frequency");
		TStr BlackListFNm = Env.GetIfArgPrefixStr("-b:", "quote_blacklist.txt", "Blacklist file name");
		bool IsQtBsReady = Env.GetIfArgPrefixBool("-qtbsready:", false, "Indicate whether quote base is ready and can be loaded readily");
		bool IsShglReady = Env.GetIfArgPrefixBool("-shglready:", false, "Indicate whether shingle hash table is ready");
		bool IsNetReady = Env.GetIfArgPrefixBool("-netready:", false, "Indicate whether cluster net is ready");
		double BktThresh = Env.GetIfArgPrefixFlt("-bktthresh:", 0.4, "Threshold for bad shingle bucket elimination");
		double MxTmDelay = Env.GetIfArgPrefixFlt("-delaythresh:", 5, "Max time delay between two quotes in the same cluster");
		double MxTmDev = Env.GetIfArgPrefixFlt("-devthresh:", 3, "Max time deviation for a quote to be specific rather than general");
		double RefineThresh = Env.GetIfArgPrefixFlt("-refinethresh:", 0.2, "Threshold for merging quote cluster in refining process");

		const TStr MinTmStr = Env.GetIfArgPrefixStr("-mint:", "20010101", "Min time of quotes, format = YYYYMMDD");
		const TStr MaxTmStr = Env.GetIfArgPrefixStr("-maxt:", "20200101", "Max time of quotes, format = YYYYMMDD");
		TSecTm MinTm(atoi(MinTmStr.GetSubStr(0,3).CStr()),atoi(MinTmStr.GetSubStr(4,5).CStr()),atoi(MinTmStr.GetSubStr(6,7).CStr()));
		TSecTm MaxTm(atoi(MaxTmStr.GetSubStr(0,3).CStr()),atoi(MaxTmStr.GetSubStr(4,5).CStr()),atoi(MaxTmStr.GetSubStr(6,7).CStr()));

		// Construct the quote base from Zarya data
		PQuoteBs QtBs = TQuoteBs::New();

		if (!IsQtBsReady) {
			int HashTableSize = 100; // 100 for each quarter, for one year data, use 400
			if (ToDo == "memeclustzarya") {
				int UrlSetSize = 4 * HashTableSize;
				QtBs->ConstructQtBsZarya(InFNm, Pref, MediaUrlFNm, MinTm, MaxTm, MinWrdLen, MinMemeFq, HashTableSize, UrlSetSize);
			}	else if (ToDo == "memeclustqtonly") {
				QtBs->ConstructQtBsQtOnly(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else if (ToDo == "memeclustqttime") {
				QtBs->ConstructQtBsQtTime(InFNm, Pref, MediaUrlFNm, MinWrdLen, MinMemeFq, HashTableSize);
			} else {
				printf("Please specify one of the three options for -do : memeclustzarya, memeclustqtonly, memeclustqttime!\n");
				return;
			}
		} else {
			TStr InQtBsNm = TStr::Fmt("%s-w%dmfq%d.QtBs", Pref.CStr(), MinWrdLen, MinMemeFq);
			if (TZipIn::IsZipFNm(InQtBsNm)) { TZipIn ZipIn(InQtBsNm);  QtBs = TQuoteBs::Load(ZipIn); }
			else { TFIn FIn(InQtBsNm);  QtBs = TQuoteBs::Load(FIn); }
		}

		// Cluster the quotes
    QtBs->ClusterQuotes(MinMemeFq, MinWrdLen, MaxWrdLen, BlackListFNm, Pref, IsShglReady, IsNetReady, BktThresh, MxTmDelay, MxTmDev, RefineThresh);

		// Dump the clusters
		bool SkipUrl = true, FlashDisp = true;
		QtBs->DumpQuoteClusters(MinWrdLen, MinClustFq, SkipUrl, FlashDisp, Pref);
	}
#pragma endregion memeclust
}
예제 #5
0
// Test node, edge attribute functionality
TEST(TNEANet, ManipulateNodesEdgeAttributes) {
  int NNodes = 1000;
  int NEdges = 1000;
  const char *FName = "demo.graph.dat";

  PNEANet Graph;
  PNEANet Graph1;
  int i;
  int x, y;
  bool t;

  Graph = TNEANet::New();
  t = Graph->Empty();

  // create the nodes
  for (i = NNodes - 1; i >= 0; i--) {
    Graph->AddNode(i);
  }

  EXPECT_EQ(NNodes, Graph->GetNodes());

  // create the edges 
  for (i = NEdges - 1; i >= 0; i--) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    Graph->AddEdge(x, y, i);
  }

  // create attributes and fill all nodes
  TStr attr1 = "str";
  TStr attr2 = "int";
  TStr attr3 = "float";
  TStr attr4 = "default";

  // Test vertical int iterator for node 3, 50, 700, 900
  // Check if we can set defaults to 0 for Int data.
  Graph->AddIntAttrN(attr2, 0);
  Graph->AddIntAttrDatN(3, 3*2, attr2);
  Graph->AddIntAttrDatN(50, 50*2, attr2);
  Graph->AddIntAttrDatN(700, 700*2, attr2);
  Graph->AddIntAttrDatN(900, 900*2, attr2);

  EXPECT_EQ(3*2, Graph->GetNAIntI(attr2, 3).GetDat());
  EXPECT_EQ(50*2, Graph->GetNAIntI(attr2, 50).GetDat());

  int NodeId = 0;
  int DefNodes = 0;
  TVec<TInt> TAIntIV = TVec<TInt>();
  for (TNEANet::TAIntI NI = Graph->BegNAIntI(attr2);
    NI < Graph->EndNAIntI(attr2); NI++) {
    if (NI.GetDat()() != 0) {
      TAIntIV.Add(NI.GetDat());
      NodeId++;
    } else {
      DefNodes++;
    }
  }
  
  EXPECT_EQ(4, NodeId);
  EXPECT_EQ(NNodes - 4, DefNodes);
  TAIntIV.Sort();
  EXPECT_EQ(3*2, TAIntIV[0]);
  EXPECT_EQ(50*2, TAIntIV[1]);
  EXPECT_EQ(700*2, TAIntIV[2]);
  EXPECT_EQ(900*2, TAIntIV[3]);

  // Test vertical flt iterator for node 3, 50, 700, 900
  Graph->AddFltAttrDatN(5, 3.41, attr3);
  Graph->AddFltAttrDatN(50, 2.718, attr3);
  Graph->AddFltAttrDatN(300, 150.0, attr3);
  Graph->AddFltAttrDatN(653, 563, attr3);

  EXPECT_EQ(3.41, Graph->GetNAFltI(attr3, 5).GetDat());
  EXPECT_EQ(2.718, Graph->GetNAFltI(attr3, 50).GetDat());

  NodeId = 0;
  DefNodes = 0;
  TVec<TFlt> TAFltIV = TVec<TFlt>();

  for (TNEANet::TAFltI NI = Graph->BegNAFltI(attr3);
    NI < Graph->EndNAFltI(attr3); NI++) {
    if (NI.GetDat() != TFlt::Mn) {
      NodeId++;
      TAFltIV.Add(NI.GetDat());
    } else {
      DefNodes++;
    }
  }

  EXPECT_EQ(4, NodeId);
  EXPECT_EQ(NNodes - 4, DefNodes);
  TAFltIV.Sort();
  EXPECT_EQ(2.718, TAFltIV[0]);
  EXPECT_EQ(3.41, TAFltIV[1]);
  EXPECT_EQ(150.0, TAFltIV[2]);
  EXPECT_EQ(563.0, TAFltIV[3]);

  // Test vertical str iterator for node 3, 50, 700, 900
  Graph->AddStrAttrDatN(10, "abc", attr1);
  Graph->AddStrAttrDatN(20, "def", attr1);
  Graph->AddStrAttrDatN(400, "ghi", attr1);
  // this does not show since ""=null
  Graph->AddStrAttrDatN(455, "", attr1);

  EXPECT_EQ('c', Graph->GetNAStrI(attr1, 10).GetDat().LastCh());
  EXPECT_EQ('f', Graph->GetNAStrI(attr1, 20).GetDat().LastCh());

  NodeId = 0;
  DefNodes = 0;
  TVec<TStr> TAStrIV = TVec<TStr>();

  for (TNEANet::TAStrI NI = Graph->BegNAStrI(attr1);
    NI < Graph->EndNAStrI(attr1); NI++) {
    if (NI.GetDat() != TStr::GetNullStr()) {
      NodeId++;
      TAStrIV.Add(NI.GetDat());
    } else {
      DefNodes++;
    }
  }

  EXPECT_EQ(3, NodeId);
  EXPECT_EQ(NNodes - 3, DefNodes);
  TAStrIV.Sort();
  // TODO(nkhadke): Fix hack to compare strings properly. This works for now.
  EXPECT_EQ('c', TAStrIV[0].LastCh());
  EXPECT_EQ('f', TAStrIV[1].LastCh());
  EXPECT_EQ('i', TAStrIV[2].LastCh());
  
    
  // Test vertical iterator over many types (must skip default/deleted attr) 
  int NId = 55;
  Graph->AddStrAttrDatN(NId, "aaa", attr1);
  Graph->AddIntAttrDatN(NId, 3*2, attr2);
  Graph->AddFltAttrDatN(NId, 3.41, attr3);
  Graph->AddStrAttrDatN(80, "dont appear", attr4); // should not show up
  TStrV NIdAttrName;
  Graph->AttrNameNI(NId, NIdAttrName);
  int AttrLen = NIdAttrName.Len();
  NodeId = 0;
  DefNodes = 0;
  EXPECT_EQ(3, AttrLen);
  
  Graph->DelAttrDatN(NId, attr2);
  Graph->AttrNameNI(NId, NIdAttrName);
  AttrLen = NIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("int") == NIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(1,2);
    }
  }
  EXPECT_EQ(2, AttrLen);

  Graph->AddIntAttrDatN(NId, 3*2, attr2);
  Graph->DelAttrN(attr1);
  Graph->AttrNameNI(NId, NIdAttrName);
  AttrLen = NIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("str") == NIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(1,2);
    }
  }
  EXPECT_EQ(2, AttrLen);
   
  TStrV NIdAttrValue;
  Graph->AttrValueNI(NId, NIdAttrValue);
  AttrLen = NIdAttrValue.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("str") == NIdAttrValue[i]()) {
      // FAIL
      EXPECT_EQ(1,2);
    }
  } 

  int expectedTotal = 0;
  for (i = 0; i <NNodes; i++) {
    Graph->AddIntAttrDatN(i, NNodes+i, attr2);
    EXPECT_EQ(NNodes+i, Graph->GetIntAttrDatN(i, attr2));
    expectedTotal += NNodes+i;
  }

  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    Graph1 = TNEANet::Load(FIn);
  }

  int total = 0;
  for (TNEANet::TAIntI NI = Graph1->BegNAIntI(attr2);
    NI < Graph1->EndNAIntI(attr2); NI++) {
    total += NI.GetDat();
  }

  ASSERT_EQ(expectedTotal, total);

  Graph1->Clr();

  // Test vertical int iterator for edge
  Graph->AddIntAttrDatE(3, 3*2, attr2);
  Graph->AddIntAttrDatE(55, 55*2, attr2);
  Graph->AddIntAttrDatE(705, 705*2, attr2);
  Graph->AddIntAttrDatE(905, 905*2, attr2);

  EXPECT_EQ(3*2, Graph->GetEAIntI(attr2, 3).GetDat());
  EXPECT_EQ(55*2, Graph->GetEAIntI(attr2, 55).GetDat());

  int EdgeId = 0;
  int DefEdges = 0;
  TAIntIV.Clr();
  for (TNEANet::TAIntI EI = Graph->BegEAIntI(attr2);
    EI < Graph->EndEAIntI(attr2); EI++) {
    if (EI.GetDat() != TInt::Mn) {
      TAIntIV.Add(EI.GetDat());
      EdgeId++;
    } else {
      DefEdges++;
    }
  } 

  EXPECT_EQ(4, EdgeId);
  EXPECT_EQ(NEdges - 4, DefEdges);
  TAIntIV.Sort();
  EXPECT_EQ(3*2, TAIntIV[0]);
  EXPECT_EQ(55*2, TAIntIV[1]);
  EXPECT_EQ(705*2, TAIntIV[2]);
  EXPECT_EQ(905*2, TAIntIV[3]);
   
  // Test vertical flt iterator for edge
  Graph->AddFltAttrE(attr3, 0.00);
  Graph->AddFltAttrDatE(5, 4.41, attr3);
  Graph->AddFltAttrDatE(50, 3.718, attr3);
  Graph->AddFltAttrDatE(300, 151.0, attr3);
  Graph->AddFltAttrDatE(653, 654, attr3);

  EXPECT_EQ(4.41, Graph->GetEAFltI(attr3, 5).GetDat());
  EXPECT_EQ(3.718, Graph->GetEAFltI(attr3, 50).GetDat());

  EdgeId = 0;
  DefEdges = 0;
  TAFltIV.Clr();

  for (TNEANet::TAFltI EI = Graph->BegEAFltI(attr3);
    EI < Graph->EndEAFltI(attr3); EI++) {
    // Check if defaults are set to 0.
    if (EI.GetDat() != 0.00) {
      TAFltIV.Add(EI.GetDat());
      EdgeId++;
    } else {
      DefEdges++;
    }
  }

  EXPECT_EQ(4, EdgeId);
  EXPECT_EQ(NEdges - 4, DefEdges);
  TAFltIV.Sort();
  EXPECT_EQ(3.718, TAFltIV[0]);
  EXPECT_EQ(4.41, TAFltIV[1]);
  EXPECT_EQ(151.0, TAFltIV[2]);
  EXPECT_EQ(654.0, TAFltIV[3]);

  // Test vertical str iterator for edge
  Graph->AddStrAttrDatE(10, "abc", attr1);
  Graph->AddStrAttrDatE(20, "def", attr1);
  Graph->AddStrAttrDatE(400, "ghi", attr1);
  // this does not show since ""=null
  Graph->AddStrAttrDatE(455, "", attr1);

  EXPECT_EQ('c', Graph->GetEAStrI(attr1, 10).GetDat().LastCh());
  EXPECT_EQ('f', Graph->GetEAStrI(attr1, 20).GetDat().LastCh());

  EdgeId = 0;
  DefEdges = 0;
  TAStrIV.Clr();

  for (TNEANet::TAStrI EI = Graph->BegEAStrI(attr1);
    EI < Graph->EndEAStrI(attr1); EI++) {
    if (EI.GetDat() != TStr::GetNullStr()) {
      TAStrIV.Add(EI.GetDat());
      EdgeId++;
    } else {
      DefEdges++;
    }
  } 

  EXPECT_EQ(3, EdgeId);
  EXPECT_EQ(NEdges - 3, DefEdges);
  TAStrIV.Sort();
  // TODO(nkhadke): Fix hack to compare strings properly. This works for now.
  EXPECT_EQ('c', TAStrIV[0].LastCh());
  EXPECT_EQ('f', TAStrIV[1].LastCh());
  EXPECT_EQ('i', TAStrIV[2].LastCh());

  // Test vertical iterator over many types (must skip default/deleted attr) 
  int EId = 55;
  Graph->AddStrAttrDatE(EId, "aaa", attr1);
  Graph->AddIntAttrDatE(EId, 3*2, attr2);
  Graph->AddFltAttrDatE(EId, 3.41, attr3);
  Graph->AddStrAttrDatE(80, "dont appear", attr4); // should not show up  

  TStrV EIdAttrName;
  Graph->AttrNameEI(EId, EIdAttrName);
  AttrLen = EIdAttrName.Len();
  EXPECT_EQ(3, AttrLen);
  
  Graph->DelAttrDatE(EId, attr2);
  Graph->AttrNameEI(EId, EIdAttrName);
  AttrLen = EIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("int") == EIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(2,3);
    }
  }

  Graph->AddIntAttrDatE(EId, 3*2, attr2);
  Graph->DelAttrE(attr1);
  Graph->AttrNameEI(EId, EIdAttrName);
  AttrLen = EIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("aaa") == EIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(2,3);
    }
  }

  TStrV EIdAttrValue;
  Graph->AttrValueEI(EId, EIdAttrValue);
  AttrLen = EIdAttrValue.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("str") == EIdAttrValue[i]()) {
      // FAIL
      EXPECT_EQ(2,3);
    }
  }

  expectedTotal = 0;
  for (i = 0; i <NEdges; i++) {
    Graph->AddIntAttrDatE(i, NEdges+i, attr2);
    EXPECT_EQ(NEdges+i, Graph->GetIntAttrDatE(i, attr2));
    expectedTotal += NEdges+i;
  }

  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
    Graph->Clr();
  }

  {
    TFIn FIn(FName);
    Graph1 = TNEANet::Load(FIn);
  }

  total = 0;
  for (TNEANet::TAIntI EI = Graph1->BegNAIntI(attr2);
    EI < Graph1->EndNAIntI(attr2); EI++) {
    total += EI.GetDat();
  }

  EXPECT_EQ(expectedTotal, total);

  //Graph1->Dump();
  Graph1->Clr();
}
예제 #6
0
// Test node, edge creation
TEST(TNEANet, ManipulateNodesEdges) {
  int NNodes = 1000;
  int NEdges = 100000;
  const char *FName = "test.graph.dat";

  PNEANet Graph;
  PNEANet Graph1;
  PNEANet Graph2;
  int i;
  int n;
  int NCount;
  int x,y;
  int Deg, InDeg, OutDeg;

  Graph = TNEANet::New();
  EXPECT_EQ(1,Graph->Empty());

  // create the nodes
  for (i = 0; i < NNodes; i++) {
    Graph->AddNode(i);
  }
  EXPECT_EQ(0,Graph->Empty());
  EXPECT_EQ(NNodes,Graph->GetNodes());

  // create random edges
  NCount = NEdges;
  while (NCount > 0) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    n = Graph->AddEdge(x, y);
    NCount--;
  }

  EXPECT_EQ(NEdges,Graph->GetEdges());

  EXPECT_EQ(0,Graph->Empty());
  EXPECT_EQ(1,Graph->IsOk());

  for (i = 0; i < NNodes; i++) {
    EXPECT_EQ(1,Graph->IsNode(i));
  }

  EXPECT_EQ(0,Graph->IsNode(NNodes));
  EXPECT_EQ(0,Graph->IsNode(NNodes+1));
  EXPECT_EQ(0,Graph->IsNode(2*NNodes));

  // nodes iterator
  NCount = 0;
  for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NCount++;
  }
  EXPECT_EQ(NNodes,NCount);

  // edges per node iterator
  NCount = 0;
  for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      NCount++;
    }
  }
  EXPECT_EQ(NEdges,NCount);

  // edges iterator
  NCount = 0;
  for (TNEANet::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
    NCount++;
  }
  EXPECT_EQ(NEdges,NCount);

  // node degree
  for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    Deg = NI.GetDeg();
    InDeg = NI.GetInDeg();
    OutDeg = NI.GetOutDeg();

    EXPECT_EQ(Deg,InDeg+OutDeg);
  }

  // assignment
  Graph1 = TNEANet::New();
  *Graph1 = *Graph;

  EXPECT_EQ(NNodes,Graph1->GetNodes());
  EXPECT_EQ(NEdges,Graph1->GetEdges());
  EXPECT_EQ(0,Graph1->Empty());
  EXPECT_EQ(1,Graph1->IsOk());

  // saving and loading
  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    Graph2 = TNEANet::Load(FIn);
  }

  EXPECT_EQ(NNodes,Graph2->GetNodes());
  EXPECT_EQ(NEdges,Graph2->GetEdges());
  EXPECT_EQ(0,Graph2->Empty());
  EXPECT_EQ(1,Graph2->IsOk());

  // remove all the nodes and edges
  for (i = 0; i < NNodes; i++) {
    n = Graph->GetRndNId();
    Graph->DelNode(n);
  }

  EXPECT_EQ(0,Graph->GetNodes());
  EXPECT_EQ(0,Graph->GetEdges());

  EXPECT_EQ(1,Graph->IsOk());
  EXPECT_EQ(1,Graph->Empty());

  Graph1->Clr();

  EXPECT_EQ(0,Graph1->GetNodes());
  EXPECT_EQ(0,Graph1->GetEdges());

  EXPECT_EQ(1,Graph1->IsOk());
  EXPECT_EQ(1,Graph1->Empty());
}
예제 #7
0
파일: test-THash.cpp 프로젝트: Zala/qminer
// Table manipulations
TEST(TStrIntH, ManipulateTable) {
  const int64 NElems = 1000000;
  int DDist = 10;
  const char *FName = "test.hashstr.dat";
  TStrIntH TableStr;
  TStrIntH TableStr1;
  TStrIntH TableStr2;
  int i;
  int d;
  int n;
  int Id;
  int Key;
  TStr KeyStr;
  int64 KeySumVal;
  int64 DatSumVal;
  int64 KeySum;
  int64 DatSum;
  int64 KeySumDel;
  int64 DatSumDel;
  int DelCount;
  int Count;
  char s[32];

  // add table elements
  d = Prime(NElems);
  n = d;
  KeySumVal = 0;
  DatSumVal = 0;
  for (i = 0; i < NElems; i++) {
    sprintf(s,"%d",n);
    TStr Str = TStr(s);
    TableStr.AddDat(Str,n+1);
    KeySumVal += n;
    DatSumVal += (n+1);
    //printf("add %d %d\n", n, n+1);
    n = (n + d) % NElems;
  }
  EXPECT_EQ(0,TableStr.Empty());
  EXPECT_EQ(NElems,TableStr.Len());

  EXPECT_EQ(0,(NElems-1)*(NElems)/2 - KeySumVal);
  EXPECT_EQ(0,(NElems)*(NElems+1)/2 - DatSumVal);

  // verify elements by successive keys
  KeySum = 0;
  DatSum = 0;
  for (i = 0; i < NElems; i++) {
    sprintf(s,"%d",i);
    TStr Str = TStr(s);
    Id = TableStr.GetKeyId(s);
    EXPECT_EQ(1,Id >= 0);
    KeyStr = TableStr.GetKey(Id);
    Key = atoi(KeyStr.CStr());
    //printf("vrfy %d %s %d %s %d %d\n", i, Str.CStr(), Id, KeyStr.CStr(), Key, (int) TableStr.GetDat(KeyStr));
    EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1);
    KeySum += Key;
    DatSum += TableStr.GetDat(KeyStr);
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by distant keys
  KeySum = 0;
  DatSum = 0;
  n = Prime(d);
  for (i = 0; i < NElems; i++) {
    sprintf(s,"%d",i);
    TStr Str = TStr(s);
    Id = TableStr.GetKeyId(s);
    EXPECT_EQ(1,Id >= 0);
    KeyStr = TableStr.GetKey(Id);
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1);
    KeySum += Key;
    DatSum += TableStr.GetDat(KeyStr);
    n = (n + d) % NElems;
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    KeySum += Key;
    DatSum += It.GetDat();
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by key index
  KeySum = 0;
  DatSum = 0;
  Id = TableStr.FFirstKeyId();
  while (TableStr.FNextKeyId(Id)) {
    EXPECT_EQ(1,Id >= 0);
    KeyStr = TableStr.GetKey(Id);
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1);
    KeySum += Key;
    DatSum += TableStr.GetDat(KeyStr);
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // delete elements
  DelCount = 0;
  KeySumDel = 0;
  DatSumDel = 0;
  for (n = 0; n < NElems; n += DDist) {
    sprintf(s,"%d",n);
    TStr Str = TStr(s);
    Id = TableStr.GetKeyId(Str);
    //printf("del %d %d %d\n", n, Id, (int) TableStr[Id]);
    KeySumDel += n;
    DatSumDel += TableStr[Id];
    TableStr.DelKeyId(Id);
    DelCount++;
  }
  EXPECT_EQ(0,TableStr.Empty());
  EXPECT_EQ(NElems-DelCount,TableStr.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // assignment
  TableStr1 = TableStr;
  EXPECT_EQ(0,TableStr1.Empty());
  EXPECT_EQ(NElems-DelCount,TableStr1.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr1.BegI(); It < TableStr1.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // saving and loading
  {
    TFOut FOut(FName);
    TableStr.Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    TableStr2.Load(FIn);
  }

  EXPECT_EQ(NElems-DelCount,TableStr2.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr2.BegI(); It < TableStr2.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // remove all elements
  for (i = 0; i < Count; i++) {
    Id = TableStr.GetRndKeyId(TInt::Rnd, 0.5);
    TableStr.DelKeyId(Id);
  }
  EXPECT_EQ(0,TableStr.Len());
  EXPECT_EQ(1,TableStr.Empty());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(0,Count);
  EXPECT_EQ(0,KeySum);
  EXPECT_EQ(0,DatSum);

  // clear the table
  TableStr1.Clr();
  EXPECT_EQ(0,TableStr1.Len());
  EXPECT_EQ(1,TableStr1.Empty());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr1.BegI(); It < TableStr1.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(0,Count);
  EXPECT_EQ(0,KeySum);
  EXPECT_EQ(0,DatSum);
}