int main(int argc, char** argv){ TBool debug = false; TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags"; if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";} Schema TagS; TagS.Add(TPair<TStr,TAttrType>("UserId", atInt)); TagS.Add(TPair<TStr,TAttrType>("Tag", atStr)); float ft_max; float mu_max; timeval timer4; gettimeofday(&timer4, NULL); double t1 = timer4.tv_sec + (timer4.tv_usec/1000000.0); PExplicitStringTable ES_Tags = ExplicitStringTable::LoadSS(TagS, TagsFnm + ".tsv"); gettimeofday(&timer4, NULL); double t2 = timer4.tv_sec + (timer4.tv_usec/1000000.0); printf("Time to load tags table: %f\n", t2 - t1); printf("Table Size:\n"); ES_Tags->PrintSize(); getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); timeval timer6; gettimeofday(&timer6, NULL); t1 = timer6.tv_sec + (timer6.tv_usec/1000000.0); PExplicitStringTable ES_TagsJoinUser = ES_Tags->SelfJoin("UserId"); gettimeofday(&timer6, NULL); t2 = timer6.tv_sec + (timer6.tv_usec/1000000.0); printf("Time to join on user id column: %f\n", t2 - t1); printf("Table Size:\n"); ES_TagsJoinUser->PrintSize(); if(debug){ ES_TagsJoinUser->SaveSS(TagsFnm + "_join_user_es.tsv");} getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); timeval timer7; gettimeofday(&timer7, NULL); t1 = timer7.tv_sec + (timer7.tv_usec/1000000.0); PExplicitStringTable ES_JavaTags = ExplicitStringTable::New(TagS); TIntV SelectedRows1; if(debug){ ES_Tags->SelectAtomicConst(TStr("Tag"), TStr("c#"), EQ, SelectedRows1, ES_JavaTags, false, true); } else{ ES_Tags->SelectAtomicConst(TStr("Tag"), TStr("java"), EQ, SelectedRows1, ES_JavaTags, false, true); } gettimeofday(&timer7, NULL); t2 = timer7.tv_sec + (timer7.tv_usec/1000000.0); printf("Time to select java users: %f\n", t2 - t1); printf("Table Size:\n"); ES_JavaTags->PrintSize(); if(debug){ ES_JavaTags->SaveSS(TagsFnm + "_select_es.tsv");} getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); return 0; }
int main(int argc, char* argv[]){ //test1(); TTableContext Context; // create scheme Schema PostS; PostS.Add(TPair<TStr,TAttrType>("Id", atInt)); PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt)); PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt)); PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr)); PostS.Add(TPair<TStr,TAttrType>("Score", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols); printf("Load done\n"); TStrV cols; cols.Add("OwnerUserId"); struct timeval begin, end; gettimeofday(&begin, NULL); P->Aggregate(cols, aaSum, "Score", "Sum"); gettimeofday(&end, NULL); double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); printf("Elapsed time:%.3lfs\n", diff / 1000000); if (atoi(argv[1]) == 0) return 0; P->SaveSS("tests/p3.txt"); return 0; }
TEST(SHMTest, LoadTables) { TStr Filename("test.graph"); TTableContext Context; // Create schema. Schema GradeS; GradeS.Add(TPair<TStr,TAttrType>("A", atStr)); GradeS.Add(TPair<TStr,TAttrType>("B", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5); PTable p1 = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols); TFOut OutStream(Filename); p1->Save(OutStream); TShMIn Shmin(Filename); PTable p2 = TTable::LoadShM(Shmin, &Context); EXPECT_EQ(p1->GetNumRows().Val, p2->GetNumRows().Val); EXPECT_EQ(p1->GetNumValidRows().Val, p2->GetNumValidRows().Val); EXPECT_EQ(p1->GetIntVal("Grade 2011", 0).Val, p2->GetIntVal("Grade 2011", 0).Val); EXPECT_EQ(p1->GetIntVal("Grade 2013", 4).Val, p2->GetIntVal("Grade 2013", 4).Val); }
int main(){ TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); // create table PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols); //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt"); T->Unique("Animal"); TTable Ts = *T; // did we fix problem with copy-c'tor ? //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols); //Ts->Unique(AnimalUnique); // test Select // create predicate tree: find all animals that are big and african or medium and Australian TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa"); TPredicate::TPredicateNode N1(A1); // Location == "Africa" TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big"); TPredicate::TPredicateNode N2(A2); // Size == "big" TPredicate::TPredicateNode N3(AND); N3.AddLeftChild(&N1); N3.AddRightChild(&N2); TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia"); TPredicate::TPredicateNode N4(A4); TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium"); TPredicate::TPredicateNode N5(A5); TPredicate::TPredicateNode N6(AND); N6.AddLeftChild(&N4); N6.AddRightChild(&N5); TPredicate::TPredicateNode N7(OR); N7.AddLeftChild(&N3); N7.AddRightChild(&N6); TPredicate Pred(&N7); TIntV SelectedRows; Ts.Select(Pred, SelectedRows); TStrV GroupBy; GroupBy.Add("Location"); T->Group(GroupBy, "LocationGroup"); GroupBy.Add("Size"); T->Group(GroupBy, "LocationSizeGroup"); T->Count("LocationCount", "Location"); PTable Tj = T->Join("Location", Ts, "Location"); TStrV UniqueAnimals; UniqueAnimals.Add("Animals_1.Animal"); UniqueAnimals.Add("Animals_2.Animal"); Tj->Unique(UniqueAnimals, false); //print table T->SaveSS("tests/animals_out_T.txt"); Ts.SaveSS("tests/animals_out_Ts.txt"); Tj->SaveSS("tests/animals_out_Tj.txt"); return 0; }
int main(){ TTableContext Context; // Case 1: Euclidean Distance Schema BuildingS; BuildingS.Add(TPair<TStr,TAttrType>("Building", atStr)); BuildingS.Add(TPair<TStr,TAttrType>("X", atInt)); BuildingS.Add(TPair<TStr,TAttrType>("Y", atInt)); // create table PTable TBuildings = TTable::LoadSS("Buildings", BuildingS, "tests/buildings.txt", Context, '\t', false); TStrV Cols; Cols.Add("X"); Cols.Add("Y"); // Find all buildings within 5 Euc Distance of each other. PTable BuildingJointTable = TBuildings->SelfSimJoin(Cols, "Euclidean_Distance", L2Norm, 5.0); BuildingJointTable->SaveSS("tests/buildings.out.txt"); // Case2 : Haversine distance Schema PlaceS; PlaceS.Add(TPair<TStr,TAttrType>("Name", atStr)); PlaceS.Add(TPair<TStr,TAttrType>("Location", atStr)); PlaceS.Add(TPair<TStr,TAttrType>("Latitude", atFlt)); PlaceS.Add(TPair<TStr,TAttrType>("Longitude", atFlt)); // create table PTable TPlaces = TTable::LoadSS("Places", PlaceS, "tests/places.txt", Context, '\t', false); Cols.Clr(); Cols.Add("Latitude"); Cols.Add("Longitude"); PTable PlacesJointTable = TPlaces->SelfSimJoin(Cols, "Distance",Haversine, 1000.0); TStrV ProjectionV; ProjectionV.Add("Places_1.Name"); ProjectionV.Add("Places_1.Location"); ProjectionV.Add("Places_2.Name"); ProjectionV.Add("Places_2.Location"); ProjectionV.Add("Distance"); PlacesJointTable->ProjectInPlace(ProjectionV); PlacesJointTable->SelectAtomic("Places_1.Name", "Places_2.Name", NEQ); PlacesJointTable->SaveSS("tests/places.out.txt"); printf("Saved buildings.out.txt and places.out.txt\n"); return 0; }
void test_ints(){ TTableContext context; Schema schema; schema.Add(TPair<TStr,TAttrType>("src",atInt)); schema.Add(TPair<TStr,TAttrType>("dst",atInt)); TStr wikifilename = "/dfs/scratch0/viswa/wiki_Vote.txt"; PTable wikitable = TTable::LoadSS(schema, wikifilename, &context, '\t', TBool(false)); printf("Loaded the table!\n"); PUNGraph pungraph = TSnap::ToGraph<PUNGraph>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst); printf("Made the TUNGraph of %d nodes and %d edges.\n",(*pungraph).GetNodes(),(*pungraph).GetEdges()); PNGraph pngraph = TSnap::ToGraph<PNGraph>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst); printf("Made the TNGraph of %d nodes and %d edges.\n",(*pngraph).GetNodes(),(*pngraph).GetEdges()); PNGraphMP pngraphmp = TSnap::ToGraphMP<PNGraphMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1()); printf("Made the TNGraphMP of %d nodes and %d edges.\n",(*pngraphmp).GetNodes(),(*pngraphmp).GetEdges()); // PNGraphMP pngraphmp_2 = TSnap::ToGraphMP3<PNGraphMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1()); // printf("Made the TNGraphMP of %d nodes and %d edges with MP2.\n",(*pngraphmp_2).GetNodes(),(*pngraphmp_2).GetEdges()); printf("Tested graph conversion with ints.\n"); TVec<TStr> emptyattrv; PNEANet pneanet = TSnap::ToNetwork<PNEANet>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),emptyattrv, emptyattrv,emptyattrv,aaFirst); printf("Made the PNEANet of %d nodes and %d edges.\n", (*pneanet).GetNodes(),(*pneanet).GetEdges()); PNEANetMP pneanetmp = TSnap::ToNetworkMP<PNEANetMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst); printf("Made the PNEANetMP of %d nodes and %d edges.\n", (*pneanetmp).GetNodes(),(*pneanet).GetEdges()); PNEANetMP pneanetmp2 = TSnap::ToNetworkMP2<PNEANetMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst); printf("Made the PNEANetMP of %d nodes and %d edges with MP2.\n", (*pneanetmp2).GetNodes(),(*pneanetmp).GetEdges()); printf("Tested network conversions with ints.\n"); }
//Function to read a table of nodes PTable AddNodeTable(TTableContext& Context) { Schema NodeScm; NodeScm.Add(TPair<TStr, TAttrType>("NodeID", atStr)); char FileName[50]; int ColCnt = 0; printf("Adding Node Table\n"); printf("Enter filename and number of columns (>= 1) \n"); scanf("%s %d", FileName, &ColCnt); for (TInt i = 1; i < ColCnt; i++) { TStr ColName = "Attribute" + i.GetStr(); NodeScm.Add(TPair<TStr, TAttrType>(ColName, atStr)); } TStr FName(FileName); PTable T = TTable::LoadSS(NodeScm, FName, Context); return T; }
int main(){ TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); PTable P = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols); P->SaveSS("tests/p1.txt"); TStrV cols; cols.Add("Size"); cols.Add("Number"); TVec<PTable> R = P->SpliceByGroup(cols); for (TInt i = 0; i < R.Len(); i++) { TStr fn = i.GetStr(); R[i]->SaveSS("tests/sznumber" + fn + ".txt"); } P->Unique(cols, true); P->SaveSS("tests/p2.txt"); TStrV group1; group1.Add("Location"); P->Group(group1, "LocationGroup"); P->SaveSS("tests/p3.txt"); return 0; }
// Function to read in a table of edges PTable AddEdgeTable(TTableContext& Context) { char FileName[200]; int ColCnt; int Reverse; printf("Adding Edge Table\n"); printf("Enter filename, number of columns (>= 2), and whether reverse? (reverse = 1, not reverse = 0\n"); scanf("%s %d %d", FileName, &ColCnt, &Reverse); Schema EdgeScm; if (Reverse == 1) { EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr)); EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr)); } else { EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr)); EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr)); } for (TInt i = 1; i < ColCnt-1; i++) { TStr ColName = "Attribute" + i.GetStr(); EdgeScm.Add(TPair<TStr, TAttrType>(ColName, atStr)); } TStr FName(FileName); PTable T = TTable::LoadSS(EdgeScm, FName, Context); return T; }
void test1() { TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); PTable P = TTable::LoadSS("Animals", AnimalS, "tests/s.txt", Context, RelevantCols); printf("Load done\n"); TStrV cols; cols.Add("Size"); cols.Add("Number"); struct timeval begin, end; gettimeofday(&begin, NULL); //P->Unique(cols); P->Group(cols, "SizeNumberGroup"); gettimeofday(&end, NULL); double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); printf("Elapsed time:%.3lfs\n", diff / 1000000); P->SaveSS("tests/p3.txt"); }
// Tests parallel select in-place function. TEST(TTable, ParallelSelectInPlace) { TTableContext Context; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); T1->SelectAtomicIntConst("Src", 87, GT); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(303, T1->GetNumValidRows().Val); }
int main(int argc, char** argv){ TBool debug = false; TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags_200000"; if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";} Schema TagS; TagS.Add(TPair<TStr,TAttrType>("UserId", atInt)); TTableContext Context; TTable::SetMP(false); float ft_max; float mu_max; timeval timer0; gettimeofday(&timer0, NULL); double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0); PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context); gettimeofday(&timer0, NULL); double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0); printf("Time to load tags table: %f\n", t2 - t1); printf("Table Size:\n"); Tags->PrintSize(); Tags->PrintContextSize(); getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); timeval timer1; gettimeofday(&timer1, NULL); t1 = timer1.tv_sec + (timer1.tv_usec/1000000.0); PTable TagsJoinTag = Tags->SelfJoin("Tag"); gettimeofday(&timer1, NULL); t2 = timer1.tv_sec + (timer1.tv_usec/1000000.0); printf("Time to join on tags column: %f\n", t2 - t1); printf("Table Size:\n"); TagsJoinTag->PrintSize(); if(debug){ TagsJoinTag->SaveSS(TagsFnm + "_join_tag.tsv");} getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); return 0; }
// Tests parallel table to graph function. TEST(TTable, ToGraphMP) { TTableContext Context; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); TVec<TPair<TStr, TAttrType> > S = T1->GetSchema(); PNGraphMP Graph = TSnap::ToGraphMP<PNGraphMP>(T1, S[0].GetVal1(), S[1].GetVal1()); EXPECT_EQ(689,Graph->GetNodes()); EXPECT_EQ(499,Graph->GetEdges()); EXPECT_EQ(1,Graph->IsOk()); }
// Tests parallel select function. TEST(TTable, ParallelSelect) { TTableContext Context; // TODO: Change this to point to a local copy of the LiveJournal table binary. // char srcfile[100] = "/dfs/ilfs2/0/ringo/benchmarks/soc-LiveJournal1.table"; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); PTable T2 = TTable::New(T1->GetSchema(), &Context); T1->SelectAtomicIntConst("Src", 88, LT, T2); EXPECT_EQ(196, T2->GetNumRows().Val); EXPECT_EQ(196, T2->GetNumValidRows().Val); }
// Tests parallel join function. TEST(TTable, ParallelJoin) { TTableContext Context; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); PTable T2 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T2->GetNumRows().Val); EXPECT_EQ(499, T2->GetNumValidRows().Val); PTable P = T1->Join("Src", T2, "Dst"); EXPECT_EQ(24, P->GetNumRows().Val); EXPECT_EQ(24, P->GetNumValidRows().Val); }
// Tests load and save from text file. TEST(TTable, LoadSave) { TTableContext Context; // Create schema. Schema GradeS; GradeS.Add(TPair<TStr,TAttrType>("Class", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Area", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5); PTable P = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols); EXPECT_EQ(5, P->GetNumRows().Val); EXPECT_EQ(5, P->GetNumValidRows().Val); EXPECT_EQ(7, P->GetIntVal("Grade 2011", 0).Val); EXPECT_EQ(9, P->GetIntVal("Grade 2013", 4).Val); EXPECT_STREQ("Compilers", P->GetStrVal("Class", 3).CStr()); P->SaveSS("table/p1.txt"); // Test SaveSS by loading the saved table and testing values again. GradeS.Add(TPair<TStr,TAttrType>("_id", atInt)); P = TTable::LoadSS(GradeS, "table/p1.txt", &Context, RelevantCols); EXPECT_EQ(5, P->GetNumRows().Val); EXPECT_EQ(5, P->GetNumValidRows().Val); EXPECT_EQ(7, P->GetIntVal("Grade 2011", 0).Val); EXPECT_EQ(9, P->GetIntVal("Grade 2013", 4).Val); EXPECT_STREQ("Compilers", P->GetStrVal("Class", 3).CStr()); }
void test_strs() { TTableContext context; Schema schema; schema.Add(TPair<TStr,TAttrType>("src",atStr)); schema.Add(TPair<TStr,TAttrType>("dst",atStr)); TStr wikifilename = "/dfs/scratch0/viswa/wiki_Vote.txt"; PTable wikitable = TTable::LoadSS(schema, wikifilename, &context, '\t', TBool(false)); TIntV rowidx1, rowidx2, rowidx3; double delta1, delta2, delta3; struct timeval start, end; printf("Str: Before indexing ...\n"); gettimeofday(&start, NULL); for (int i = 0 ; i < 100; i++) rowidx1 = wikitable->GetStrRowIdxByMap("src", 7096); gettimeofday(&end, NULL); delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; gettimeofday(&start, NULL); for (int i = 0 ; i < 100; i++) rowidx2 = wikitable->GetStrRowIdxByMap("src", 7114); gettimeofday(&end, NULL); delta2 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; gettimeofday(&start, NULL); for (int i = 0 ; i < 100; i++) rowidx3 = wikitable->GetStrRowIdxByMap("src", 0); gettimeofday(&end, NULL); delta3 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; printf("Time 1: %f\tTime 2: %f\t Time 3: %f\n", delta1, delta2, delta3); printf("%d %d\n", rowidx1[0], rowidx2[0]); printf("Row Idx3: "); for (int i = 0 ; i < rowidx3.Len() ; i++ ) { printf("%d ", rowidx3[i]); } printf("\n"); gettimeofday(&start, NULL); wikitable->RequestIndexStrMap("src"); gettimeofday(&end, NULL); delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; printf("Str: Time to index: %f\n", delta1); printf("Str: After indexing ...\n"); gettimeofday(&start, NULL); for (int i = 0 ; i < 100; i++) rowidx1 = wikitable->GetStrRowIdxByMap("src", 7096); gettimeofday(&end, NULL); delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; gettimeofday(&start, NULL); for (int i = 0 ; i < 100; i++) rowidx2 = wikitable->GetStrRowIdxByMap("src", 7114); gettimeofday(&end, NULL); delta2 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; gettimeofday(&start, NULL); for (int i = 0 ; i < 100; i++) rowidx3 = wikitable->GetStrRowIdxByMap("src", 0); gettimeofday(&end, NULL); delta3 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6; printf("Time 1: %f\tTime 2: %f\t Time 3: %f\n", delta1, delta2, delta3); printf("%d %d\n", rowidx1[0], rowidx2[0]); printf("Row Idx3: "); for (int i = 0 ; i < rowidx3.Len() ; i++ ) { printf("%d ", rowidx3[i]); } printf("\n"); /* int i = 0; for (TRowIterator RowI = wikitable->BegRI(); RowI < wikitable->EndRI(); RowI++) { if (i > 100000) printf("%d %d %d\n", RowI.GetRowIdx(), RowI.GetStrMapByName("src"), RowI.GetStrMapByName("dst")); i++; // if (i > 10) break; } */ }
int main(int argc, char* argv[]) { TTableContext Context; Schema NetworkS; NetworkS.Add(TPair<TStr, TAttrType>("Year", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("Month", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("DayOfMonth", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("DayOfWeek", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("DepTime", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("CRSDepTime", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("ArrTime", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("CRSArrTime", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("UniqueCarrier", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("FlightNum", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("TailNum", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("ActualElapsedTime", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("CRSElapsedTime", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("AirTime", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("ArrDelay", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("DepDelay", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("Origin", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("Dest", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("Distance", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("TaxiIn", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("TaxiOut", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("Cancelled", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("CancellationCode", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("Diverted", atStr)); NetworkS.Add(TPair<TStr, TAttrType>("CarrierDelay", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("WeatherDelay", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("NASDelay", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("SecurityDelay", atInt)); NetworkS.Add(TPair<TStr, TAttrType>("LateAircraftDelay", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5); RelevantCols.Add(6); RelevantCols.Add(7); RelevantCols.Add(8); RelevantCols.Add(9); RelevantCols.Add(10); RelevantCols.Add(11); RelevantCols.Add(12); RelevantCols.Add(13); RelevantCols.Add(14); RelevantCols.Add(15); RelevantCols.Add(16); RelevantCols.Add(17); RelevantCols.Add(18); RelevantCols.Add(19); RelevantCols.Add(20); RelevantCols.Add(21); RelevantCols.Add(22); RelevantCols.Add(23); RelevantCols.Add(24); RelevantCols.Add(25); RelevantCols.Add(26); RelevantCols.Add(27); RelevantCols.Add(28); PTable P = TTable::LoadSS(NetworkS, "table/2007.csv", Context, RelevantCols, ',', false); TStrV SV; TStrV DV; TStrV VE; double start = omp_get_wtime(); PNSparseNet G = TSnap::ToNetwork<PNSparseNet>(P, TStr("Origin"), TStr("Dest"), SV, DV, VE, aaLast); double end = omp_get_wtime(); printf("Conversion time without attributes %f\n", (end-start)); start = omp_get_wtime(); TSnap::AddAttrTable<PNSparseNet>(P, G, TStr("Origin"), TStr("Dest"), SV, DV, VE, aaLast); end = omp_get_wtime(); printf("Conversion time with attributes %f\n", (end-start)); /*(PTable Table, PGraph& Graph, const TStr& SrcCol, const TStr& DstCol, TStrV& SrcAttrV, TStrV& DstAttrV, TStrV& EdgeAttrV, TAttrAggr AggrPolicy, TInt DefaultInt, TFlt DefaultFlt, TStr DefaultStr)*/ }
int main(int argc, char** argv) { TBool debug = false; TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags"; if(debug) { TagsFnm = "/lfs/madmax4/0/yonathan/tags_small"; } Schema TagS; TagS.Add(TPair<TStr,TAttrType>("UserId", atInt)); TagS.Add(TPair<TStr,TAttrType>("Tag", atStr)); // Load a TTable object and benchmark how long it takes to iterate over all of its records TTableContext Context; TTable::SetMP(false); //float ft0, ft1, ft2, ft3; //float mu0, mu1, mu2, mu3; float ft_max; float mu_max; //float tdiff1, tdiff2, dfiff3; //float mdiff1, mdiff2, mdiff3; //getcpumem(&ft0,&mu0); timeval timer0; gettimeofday(&timer0, NULL); double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0); PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context); gettimeofday(&timer0, NULL); double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0); printf("Time to load tags table: %f\n", t2 - t1); printf("Table Size:\n"); Tags->PrintSize(); Tags->PrintContextSize(); //getcpumem(&ft1,&mu1); //tdiff1 = ft1 - ft0; //mdiff1 = mu1 - mu0; //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff1, mdiff1); getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); timeval timer2; gettimeofday(&timer2, NULL); t1 = timer2.tv_sec + (timer2.tv_usec/1000000.0); printf("start join on user\n"); PTable TagsJoinUser = Tags->SelfJoin("UserId"); printf("finish join on user\n"); gettimeofday(&timer2, NULL); t2 = timer2.tv_sec + (timer2.tv_usec/1000000.0); printf("Time to join on user id column: %f\n", t2 - t1); printf("Table Size:\n"); TagsJoinUser->PrintSize(); //getcpumem(&ft2,&mu2); if(debug) { TagsJoinUser->SaveSS(TagsFnm + "_join_user.tsv"); } //tdiff2 = ft2 - ft1; //mdiff2 = mu2 - mu1; //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff2, mdiff2); getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); timeval timer3; gettimeofday(&timer3, NULL); t1 = timer3.tv_sec + (timer3.tv_usec/1000000.0); PTable JavaTags = TTable::New(TagS, Context); TIntV SelectedRows; if(debug) { Tags->SelectAtomicConst(TStr("Tag"), TStr("c#"), EQ, SelectedRows, JavaTags, false, true); } else { Tags->SelectAtomicConst(TStr("Tag"), TStr("java"), EQ, SelectedRows, JavaTags, false, true); } gettimeofday(&timer3, NULL); t2 = timer3.tv_sec + (timer3.tv_usec/1000000.0); printf("Time to select java users: %f\n", t2 - t1); printf("Table Size:\n"); JavaTags->PrintSize(); //getcpumem(&ft3,&mu3); if(debug) { JavaTags->SaveSS(TagsFnm + "_select.tsv"); } //tdiff3 = ft3 - ft2; //mdiff3 = mu3 - mu2; //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff3, mdiff3); getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); return 0; }
/** * Used for benchmarking sorting by source algorithm. * Takes as input starting point of * a top cascade and outputs time taken for casacade detection. * Input : Source, Dest, Start, Duration * Output : Prints the time for cascade detection */ int main(int argc,char* argv[]) { TTableContext Context; Schema TimeS; TimeS.Add(TPair<TStr,TAttrType>("Source",atInt)); TimeS.Add(TPair<TStr,TAttrType>("Dest",atInt)); TimeS.Add(TPair<TStr,TAttrType>("Start",atInt)); TimeS.Add(TPair<TStr,TAttrType>("Duration",atInt)); PTable P1 = TTable::LoadSS(TimeS,"./../../../../datasets/temporal/yemen_call_201001.txt",&Context,' '); TIntV MapV; TStrV SortBy; SortBy.Add("Source"); P1->Order(SortBy); TIntV Source; // Sorted vec of start time P1->ReadIntCol("Source",Source); for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) { MapV.Add(RI.GetRowIdx()); } // Attribute to Int mapping TInt SIdx = P1->GetColIdx("Source"); TInt DIdx = P1->GetColIdx("Dest"); TInt StIdx = P1->GetColIdx("Start"); TInt DuIdx = P1->GetColIdx("Duration"); int W = atoi(argv[1]); int len = 0; // Find the starting point int TSource = atoi(argv[2]); int TDest = atoi(argv[3]); int TStart = atoi(argv[4]); int TDur = atoi(argv[5]); TInt RIdx; for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) { RIdx = RI.GetRowIdx(); int RSource = P1->GetIntValAtRowIdx(SIdx,RIdx).Val; int RDest = P1->GetIntValAtRowIdx(DIdx,RIdx).Val; int RStart = P1->GetIntValAtRowIdx(StIdx,RIdx).Val; int RDur = P1->GetIntValAtRowIdx(DuIdx,RIdx).Val; if (TSource == RSource && TDest == RDest && TStart == RStart && TDur == RDur) break; } // Start building the cascade from the start point clock_t st,et; st = clock(); for (int i = 0; i < 1; i++) { THashSet<TInt> VisitedH; TSnapQueue<TInt> EventQ; EventQ.Push(RIdx); VisitedH.AddKey(RIdx); while (!EventQ.Empty()) { TInt CIdx = EventQ.Top(); EventQ.Pop(); int CDest = P1->GetIntValAtRowIdx(DIdx,CIdx).Val; int CStart = P1->GetIntValAtRowIdx(StIdx,CIdx).Val; int CDur = P1->GetIntValAtRowIdx(DuIdx,CIdx).Val; // In line binary search int val = CDest; int lo = 0; int hi = Source.Len() - 1; int index = -1; while (hi >= lo) { int mid = lo + (hi - lo)/2; if (Source.GetVal(mid) > val) { hi = mid - 1;} else if (Source.GetVal(mid) < val) { lo = mid + 1;} else { index = mid; hi = mid - 1;} } // End of binary search int BIdx = index; for(int i = BIdx; i < Source.Len(); i++) { int PId = MapV.GetVal(i).Val; if (! VisitedH.IsKey(PId)) { int TSource = P1->GetIntValAtRowIdx(SIdx,PId).Val; int TStart = P1->GetIntValAtRowIdx(StIdx,PId).Val; if (TSource != CDest) { break; } if (TStart >= (CDur + CStart) && TStart - (CDur + CStart) <= W) { VisitedH.AddKey(PId); EventQ.Push(PId); } } } } len = VisitedH.Len(); } et = clock(); float diff = ((float) et - (float) st)/CLOCKS_PER_SEC; printf("Size %d,Time %f\n",len,diff); return 0; }