Exemplo n.º 1
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Memetracker Converter. Build: %s, %s. Time: %s",
        __TIME__, __DATE__, TExeTm::GetCurTm()), 1, true);
  const TStr netinfInputFile = Env.GetIfArgPrefixStr(
      "-netinfInputFile=",
      "data/memetracker/InfoNet5000Q1000NEXP.txt",
      "File containing the inferred network, used as underlying structure");
  const TStr clusteredCascadesInputFile = Env.GetIfArgPrefixStr(
      "-clusteredCascadesInputFile=",
      "data/memetracker/clust-qt08080902w3mfq5.txt",
      "File containing clustered instances of memes and their path on the web");
  const TInt cascadeInputLine = Env.GetIfArgPrefixInt(
      "-cascadeInputLine=", 75926,
      "Specific cascade to read from clusteredCascadesInputFile");

  const TStr snapNetworkOutputFile = Env.GetIfArgPrefixStr(
      "-snapNetworkOutputFile=", "data/memetracker/memeNetwork.dat",
      "File where to write the output network as a snap binary");
  const TStr groundTruthOutputFile = Env.GetIfArgPrefixStr(
      "-groundTruthOutputFile=", "data/memetracker/memeGroundTruth.txt",
      "File where to write the ground truth");

  // Build the network from the most popular websites.
  ifstream inputfile(netinfInputFile.CStr());
  string line;
  getline(inputfile, line); // read header line from file
  map<string, unsigned int> urlToNodeIdHash;
  PUNGraph graph = PUNGraph::New();
  unsigned int nodeId = 0;
  while (getline(inputfile, line)) {
    istringstream iss(line);
    string idx, src, dst;
    iss >> idx >> src >> dst;
    if (urlToNodeIdHash.find(src) == urlToNodeIdHash.end()) {
      urlToNodeIdHash[src] = nodeId;
      graph->AddNode(nodeId);
      nodeId++;
    }
    if (urlToNodeIdHash.find(dst) == urlToNodeIdHash.end()) {
      urlToNodeIdHash[dst] = nodeId;
      graph->AddNode(nodeId);
      nodeId++;
    }
    graph->AddEdge(urlToNodeIdHash[src], urlToNodeIdHash[dst]);
  }
  // Save network.
  { TFOut FOut(snapNetworkOutputFile); graph->Save(FOut); }

  // Read one memetracker entry.
  ifstream memetracker(clusteredCascadesInputFile.CStr());
  for (int i = 0; i < cascadeInputLine; ++i)
    getline(memetracker, line);
  getline(memetracker, line);
  int entries, dummyInt;
  istringstream iss(line);
  iss >> dummyInt >> entries;

  cout << "Building cascade for ";
  while (!iss.eof()) {
    string phrase;
    iss >> phrase;
    cout << phrase << " ";
  }
  cout << endl;

  // Dump cascade to some file.
  ofstream dumpStream;
  dumpStream.open(groundTruthOutputFile.CStr());

  string dummy, url;
  map<string, unsigned int> infectionTimeHash;
  unsigned int infectionTime = 0;
  for (int i = 0; i < entries; ++i) {
    // Read through each "infected" URL.
    getline(memetracker, line);
    istringstream iss(line);
    // These fields of the cascade entry ar not important.
    iss >> dummy >> dummy >> dummy >> dummy;
    iss >> url;
    // Parse the URL and identify the host website.
    uri::uri instance(url);
    assert(instance.is_valid());
    // If node not in network or already infected, skip.
    if (urlToNodeIdHash.find(instance.host()) == urlToNodeIdHash.end() ||
        infectionTimeHash.find(instance.host()) != infectionTimeHash.end())
      continue;
    infectionTimeHash[instance.host()] = infectionTime++;
    // Dump as pair of <nodeId, infectionTime>.
    dumpStream << urlToNodeIdHash[instance.host()] << " " <<
      infectionTimeHash[instance.host()] << endl;
  }
  return 0;
}
Exemplo n.º 2
0
// Test node, edge creation
void ManipulateNodesEdges() {
  int NNodes = 10000;
  int NEdges = 100000;
  const char *FName = "test.graph";

  PUNGraph Graph;
  PUNGraph Graph1;
  PUNGraph Graph2;
  int i;
  int n;
  int NCount;
  int ECount1;
  int ECount2;
  int x,y;
  bool t;

  Graph = TUNGraph::New();
  t = Graph->Empty();

  // create the nodes
  for (i = 0; i < NNodes; i++) {
    Graph->AddNode(i);
  }
  t = Graph->Empty();
  n = Graph->GetNodes();

  // create random edges
  NCount = NEdges;
  while (NCount > 0) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    // Graph->GetEdges() is not correct for the loops (x == y),
    // skip the loops in this test
    if (x != y  &&  !Graph->IsEdge(x,y)) {
      n = Graph->AddEdge(x, y);
      NCount--;
    }
  }
  PrintGStats("ManipulateNodesEdges:Graph",Graph);

  // get all the nodes
  NCount = 0;
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NCount++;
  }

  // get all the edges for all the nodes
  ECount1 = 0;
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      ECount1++;
    }
  }
  ECount1 /= 2;

  // get all the edges directly
  ECount2 = 0;
  for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
    ECount2++;
  }
  printf("graph ManipulateNodesEdges:Graph, nodes %d, edges1 %d, edges2 %d\n",
      NCount, ECount1, ECount2);

  // assignment
  Graph1 = TUNGraph::New();
  *Graph1 = *Graph;
  PrintGStats("ManipulateNodesEdges:Graph1",Graph1);

  // save the graph
  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
  }

  // load the graph
  {
    TFIn FIn(FName);
    Graph2 = TUNGraph::Load(FIn);
  }
  PrintGStats("ManipulateNodesEdges:Graph2",Graph2);

  // remove all the nodes and edges
  for (i = 0; i < NNodes; i++) {
    n = Graph->GetRndNId();
    Graph->DelNode(n);
  }

  PrintGStats("ManipulateNodesEdges:Graph",Graph);

  Graph1->Clr();
  PrintGStats("ManipulateNodesEdges:Graph1",Graph1);
}
Exemplo n.º 3
0
// Test node, edge creation
TEST(TUNGraph, ManipulateNodesEdges) {
  int NNodes = 10000;
  int NEdges = 100000;
  const char *FName = "test.graph";

  PUNGraph Graph;
  PUNGraph Graph1;
  PUNGraph Graph2;
  int i;
  int n;
  int NCount;
  int x,y;
  int Deg, InDeg, OutDeg;

  Graph = TUNGraph::New();
  EXPECT_EQ(1,Graph->Empty());

  // create the nodes
  for (i = 0; i < NNodes; i++) {
    Graph->AddNode(i);
  }
  EXPECT_EQ(0,Graph->Empty());
  EXPECT_EQ(NNodes,Graph->GetNodes());

  // create random edges
  NCount = NEdges;
  while (NCount > 0) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    // Graph->GetEdges() is not correct for the loops (x == y),
    // skip the loops in this test
    if (x != y  &&  !Graph->IsEdge(x,y)) {
      n = Graph->AddEdge(x, y);
      NCount--;
    }
  }

  EXPECT_EQ(NEdges,Graph->GetEdges());

  EXPECT_EQ(0,Graph->Empty());
  EXPECT_EQ(1,Graph->IsOk());

  for (i = 0; i < NNodes; i++) {
    EXPECT_EQ(1,Graph->IsNode(i));
  }

  EXPECT_EQ(0,Graph->IsNode(NNodes));
  EXPECT_EQ(0,Graph->IsNode(NNodes+1));
  EXPECT_EQ(0,Graph->IsNode(2*NNodes));

  // nodes iterator
  NCount = 0;
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NCount++;
  }
  EXPECT_EQ(NNodes,NCount);

  // edges per node iterator
  NCount = 0;
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      NCount++;
    }
  }
  EXPECT_EQ(NEdges*2,NCount);

  // edges iterator
  NCount = 0;
  for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
    NCount++;
  }
  EXPECT_EQ(NEdges,NCount);

  // node degree
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    Deg = NI.GetDeg();
    InDeg = NI.GetInDeg();
    OutDeg = NI.GetOutDeg();

    EXPECT_EQ(Deg,InDeg);
    EXPECT_EQ(Deg,OutDeg);
  }

  // assignment
  Graph1 = TUNGraph::New();
  *Graph1 = *Graph;

  EXPECT_EQ(NNodes,Graph1->GetNodes());
  EXPECT_EQ(NEdges,Graph1->GetEdges());
  EXPECT_EQ(0,Graph1->Empty());
  EXPECT_EQ(1,Graph1->IsOk());

  // saving and loading
  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    Graph2 = TUNGraph::Load(FIn);
  }

  EXPECT_EQ(NNodes,Graph2->GetNodes());
  EXPECT_EQ(NEdges,Graph2->GetEdges());
  EXPECT_EQ(0,Graph2->Empty());
  EXPECT_EQ(1,Graph2->IsOk());

  // remove all the nodes and edges
  for (i = 0; i < NNodes; i++) {
    n = Graph->GetRndNId();
    Graph->DelNode(n);
  }

  EXPECT_EQ(0,Graph->GetNodes());
  EXPECT_EQ(0,Graph->GetEdges());

  EXPECT_EQ(1,Graph->IsOk());
  EXPECT_EQ(1,Graph->Empty());

  Graph1->Clr();

  EXPECT_EQ(0,Graph1->GetNodes());
  EXPECT_EQ(0,Graph1->GetEdges());

  EXPECT_EQ(1,Graph1->IsOk());
  EXPECT_EQ(1,Graph1->Empty());
}