int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Memetracker Converter. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()), 1, true); const TStr netinfInputFile = Env.GetIfArgPrefixStr( "-netinfInputFile=", "data/memetracker/InfoNet5000Q1000NEXP.txt", "File containing the inferred network, used as underlying structure"); const TStr clusteredCascadesInputFile = Env.GetIfArgPrefixStr( "-clusteredCascadesInputFile=", "data/memetracker/clust-qt08080902w3mfq5.txt", "File containing clustered instances of memes and their path on the web"); const TInt cascadeInputLine = Env.GetIfArgPrefixInt( "-cascadeInputLine=", 75926, "Specific cascade to read from clusteredCascadesInputFile"); const TStr snapNetworkOutputFile = Env.GetIfArgPrefixStr( "-snapNetworkOutputFile=", "data/memetracker/memeNetwork.dat", "File where to write the output network as a snap binary"); const TStr groundTruthOutputFile = Env.GetIfArgPrefixStr( "-groundTruthOutputFile=", "data/memetracker/memeGroundTruth.txt", "File where to write the ground truth"); // Build the network from the most popular websites. ifstream inputfile(netinfInputFile.CStr()); string line; getline(inputfile, line); // read header line from file map<string, unsigned int> urlToNodeIdHash; PUNGraph graph = PUNGraph::New(); unsigned int nodeId = 0; while (getline(inputfile, line)) { istringstream iss(line); string idx, src, dst; iss >> idx >> src >> dst; if (urlToNodeIdHash.find(src) == urlToNodeIdHash.end()) { urlToNodeIdHash[src] = nodeId; graph->AddNode(nodeId); nodeId++; } if (urlToNodeIdHash.find(dst) == urlToNodeIdHash.end()) { urlToNodeIdHash[dst] = nodeId; graph->AddNode(nodeId); nodeId++; } graph->AddEdge(urlToNodeIdHash[src], urlToNodeIdHash[dst]); } // Save network. { TFOut FOut(snapNetworkOutputFile); graph->Save(FOut); } // Read one memetracker entry. ifstream memetracker(clusteredCascadesInputFile.CStr()); for (int i = 0; i < cascadeInputLine; ++i) getline(memetracker, line); getline(memetracker, line); int entries, dummyInt; istringstream iss(line); iss >> dummyInt >> entries; cout << "Building cascade for "; while (!iss.eof()) { string phrase; iss >> phrase; cout << phrase << " "; } cout << endl; // Dump cascade to some file. ofstream dumpStream; dumpStream.open(groundTruthOutputFile.CStr()); string dummy, url; map<string, unsigned int> infectionTimeHash; unsigned int infectionTime = 0; for (int i = 0; i < entries; ++i) { // Read through each "infected" URL. getline(memetracker, line); istringstream iss(line); // These fields of the cascade entry ar not important. iss >> dummy >> dummy >> dummy >> dummy; iss >> url; // Parse the URL and identify the host website. uri::uri instance(url); assert(instance.is_valid()); // If node not in network or already infected, skip. if (urlToNodeIdHash.find(instance.host()) == urlToNodeIdHash.end() || infectionTimeHash.find(instance.host()) != infectionTimeHash.end()) continue; infectionTimeHash[instance.host()] = infectionTime++; // Dump as pair of <nodeId, infectionTime>. dumpStream << urlToNodeIdHash[instance.host()] << " " << infectionTimeHash[instance.host()] << endl; } return 0; }
// Test node, edge creation void ManipulateNodesEdges() { int NNodes = 10000; int NEdges = 100000; const char *FName = "test.graph"; PUNGraph Graph; PUNGraph Graph1; PUNGraph Graph2; int i; int n; int NCount; int ECount1; int ECount2; int x,y; bool t; Graph = TUNGraph::New(); t = Graph->Empty(); // create the nodes for (i = 0; i < NNodes; i++) { Graph->AddNode(i); } t = Graph->Empty(); n = Graph->GetNodes(); // create random edges NCount = NEdges; while (NCount > 0) { x = (long) (drand48() * NNodes); y = (long) (drand48() * NNodes); // Graph->GetEdges() is not correct for the loops (x == y), // skip the loops in this test if (x != y && !Graph->IsEdge(x,y)) { n = Graph->AddEdge(x, y); NCount--; } } PrintGStats("ManipulateNodesEdges:Graph",Graph); // get all the nodes NCount = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NCount++; } // get all the edges for all the nodes ECount1 = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { for (int e = 0; e < NI.GetOutDeg(); e++) { ECount1++; } } ECount1 /= 2; // get all the edges directly ECount2 = 0; for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { ECount2++; } printf("graph ManipulateNodesEdges:Graph, nodes %d, edges1 %d, edges2 %d\n", NCount, ECount1, ECount2); // assignment Graph1 = TUNGraph::New(); *Graph1 = *Graph; PrintGStats("ManipulateNodesEdges:Graph1",Graph1); // save the graph { TFOut FOut(FName); Graph->Save(FOut); FOut.Flush(); } // load the graph { TFIn FIn(FName); Graph2 = TUNGraph::Load(FIn); } PrintGStats("ManipulateNodesEdges:Graph2",Graph2); // remove all the nodes and edges for (i = 0; i < NNodes; i++) { n = Graph->GetRndNId(); Graph->DelNode(n); } PrintGStats("ManipulateNodesEdges:Graph",Graph); Graph1->Clr(); PrintGStats("ManipulateNodesEdges:Graph1",Graph1); }
// Test node, edge creation TEST(TUNGraph, ManipulateNodesEdges) { int NNodes = 10000; int NEdges = 100000; const char *FName = "test.graph"; PUNGraph Graph; PUNGraph Graph1; PUNGraph Graph2; int i; int n; int NCount; int x,y; int Deg, InDeg, OutDeg; Graph = TUNGraph::New(); EXPECT_EQ(1,Graph->Empty()); // create the nodes for (i = 0; i < NNodes; i++) { Graph->AddNode(i); } EXPECT_EQ(0,Graph->Empty()); EXPECT_EQ(NNodes,Graph->GetNodes()); // create random edges NCount = NEdges; while (NCount > 0) { x = (long) (drand48() * NNodes); y = (long) (drand48() * NNodes); // Graph->GetEdges() is not correct for the loops (x == y), // skip the loops in this test if (x != y && !Graph->IsEdge(x,y)) { n = Graph->AddEdge(x, y); NCount--; } } EXPECT_EQ(NEdges,Graph->GetEdges()); EXPECT_EQ(0,Graph->Empty()); EXPECT_EQ(1,Graph->IsOk()); for (i = 0; i < NNodes; i++) { EXPECT_EQ(1,Graph->IsNode(i)); } EXPECT_EQ(0,Graph->IsNode(NNodes)); EXPECT_EQ(0,Graph->IsNode(NNodes+1)); EXPECT_EQ(0,Graph->IsNode(2*NNodes)); // nodes iterator NCount = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NCount++; } EXPECT_EQ(NNodes,NCount); // edges per node iterator NCount = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { for (int e = 0; e < NI.GetOutDeg(); e++) { NCount++; } } EXPECT_EQ(NEdges*2,NCount); // edges iterator NCount = 0; for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { NCount++; } EXPECT_EQ(NEdges,NCount); // node degree for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { Deg = NI.GetDeg(); InDeg = NI.GetInDeg(); OutDeg = NI.GetOutDeg(); EXPECT_EQ(Deg,InDeg); EXPECT_EQ(Deg,OutDeg); } // assignment Graph1 = TUNGraph::New(); *Graph1 = *Graph; EXPECT_EQ(NNodes,Graph1->GetNodes()); EXPECT_EQ(NEdges,Graph1->GetEdges()); EXPECT_EQ(0,Graph1->Empty()); EXPECT_EQ(1,Graph1->IsOk()); // saving and loading { TFOut FOut(FName); Graph->Save(FOut); FOut.Flush(); } { TFIn FIn(FName); Graph2 = TUNGraph::Load(FIn); } EXPECT_EQ(NNodes,Graph2->GetNodes()); EXPECT_EQ(NEdges,Graph2->GetEdges()); EXPECT_EQ(0,Graph2->Empty()); EXPECT_EQ(1,Graph2->IsOk()); // remove all the nodes and edges for (i = 0; i < NNodes; i++) { n = Graph->GetRndNId(); Graph->DelNode(n); } EXPECT_EQ(0,Graph->GetNodes()); EXPECT_EQ(0,Graph->GetEdges()); EXPECT_EQ(1,Graph->IsOk()); EXPECT_EQ(1,Graph->Empty()); Graph1->Clr(); EXPECT_EQ(0,Graph1->GetNodes()); EXPECT_EQ(0,Graph1->GetEdges()); EXPECT_EQ(1,Graph1->IsOk()); EXPECT_EQ(1,Graph1->Empty()); }