int main(int argc, char** argv){
  TBool debug = false;
  TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags";
  if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";}
  Schema TagS; 
  TagS.Add(TPair<TStr,TAttrType>("UserId", atInt));
  TagS.Add(TPair<TStr,TAttrType>("Tag", atStr));
	
  float ft_max;
  float mu_max;

  timeval timer4;
  gettimeofday(&timer4, NULL);
  double t1 = timer4.tv_sec + (timer4.tv_usec/1000000.0);
  PExplicitStringTable ES_Tags = ExplicitStringTable::LoadSS(TagS, TagsFnm + ".tsv");
  gettimeofday(&timer4, NULL);
  double t2 = timer4.tv_sec + (timer4.tv_usec/1000000.0);
  printf("Time to load tags table: %f\n", t2 - t1);
  printf("Table Size:\n");
  ES_Tags->PrintSize();
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  timeval timer6;
  gettimeofday(&timer6, NULL);
  t1 = timer6.tv_sec + (timer6.tv_usec/1000000.0);
  PExplicitStringTable ES_TagsJoinUser = ES_Tags->SelfJoin("UserId");
  gettimeofday(&timer6, NULL);
  t2 = timer6.tv_sec + (timer6.tv_usec/1000000.0);
  printf("Time to join on user id column: %f\n", t2 - t1);
  printf("Table Size:\n");
  ES_TagsJoinUser->PrintSize();
  if(debug){ ES_TagsJoinUser->SaveSS(TagsFnm + "_join_user_es.tsv");}
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  timeval timer7;
  gettimeofday(&timer7, NULL);
  t1 = timer7.tv_sec + (timer7.tv_usec/1000000.0);
  PExplicitStringTable ES_JavaTags = ExplicitStringTable::New(TagS);
  TIntV SelectedRows1;
  if(debug){
  	ES_Tags->SelectAtomicConst(TStr("Tag"), TStr("c#"), EQ, SelectedRows1, ES_JavaTags, false, true);
  } else{
  	ES_Tags->SelectAtomicConst(TStr("Tag"), TStr("java"), EQ, SelectedRows1, ES_JavaTags, false, true);
  }
  gettimeofday(&timer7, NULL);
  t2 = timer7.tv_sec + (timer7.tv_usec/1000000.0);
  printf("Time to select java users: %f\n", t2 - t1);
  printf("Table Size:\n");
  ES_JavaTags->PrintSize();
  if(debug){ ES_JavaTags->SaveSS(TagsFnm + "_select_es.tsv");}
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  return 0;
}
Example #2
0
int main(int argc, char* argv[]){
  //test1();
  TTableContext Context;

  // create scheme
  Schema PostS;
  PostS.Add(TPair<TStr,TAttrType>("Id", atInt));
  PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr));
  PostS.Add(TPair<TStr,TAttrType>("Score", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4);

  PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("OwnerUserId");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);
  P->Aggregate(cols, aaSum, "Score", "Sum");
  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);
  
  if (atoi(argv[1]) == 0) return 0;

  P->SaveSS("tests/p3.txt");

  return 0;
}
Example #3
0
TEST(SHMTest, LoadTables) {
  TStr Filename("test.graph");

  TTableContext Context;
  // Create schema.
  Schema GradeS;
  GradeS.Add(TPair<TStr,TAttrType>("A", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("B", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);
  RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5);
  PTable p1 = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols);
  TFOut OutStream(Filename);
  p1->Save(OutStream);

  TShMIn Shmin(Filename);
  PTable p2 = TTable::LoadShM(Shmin, &Context);

  EXPECT_EQ(p1->GetNumRows().Val, p2->GetNumRows().Val);
  EXPECT_EQ(p1->GetNumValidRows().Val, p2->GetNumValidRows().Val); 
  EXPECT_EQ(p1->GetIntVal("Grade 2011", 0).Val, p2->GetIntVal("Grade 2011", 0).Val);
  EXPECT_EQ(p1->GetIntVal("Grade 2013", 4).Val, p2->GetIntVal("Grade 2013", 4).Val);
}
Example #4
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  // create table
  PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);
  //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
  T->Unique("Animal");
  TTable Ts = *T;  // did we fix problem with copy-c'tor ?
  //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols);
  //Ts->Unique(AnimalUnique);

  // test Select
  // create predicate tree: find all animals that are big and african or medium and Australian
  TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa");  
  TPredicate::TPredicateNode N1(A1);  // Location == "Africa"
  TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big");  
  TPredicate::TPredicateNode N2(A2);  // Size == "big"
  TPredicate::TPredicateNode N3(AND);
  N3.AddLeftChild(&N1);
  N3.AddRightChild(&N2);
  TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia");  
  TPredicate::TPredicateNode N4(A4);  
  TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium");  
  TPredicate::TPredicateNode N5(A5); 
  TPredicate::TPredicateNode N6(AND);
  N6.AddLeftChild(&N4);
  N6.AddRightChild(&N5);
  TPredicate::TPredicateNode N7(OR);
  N7.AddLeftChild(&N3);
  N7.AddRightChild(&N6);
  TPredicate Pred(&N7);
  TIntV SelectedRows;
  Ts.Select(Pred, SelectedRows);

  TStrV GroupBy;
  GroupBy.Add("Location");
  T->Group(GroupBy, "LocationGroup");
  GroupBy.Add("Size");
  T->Group(GroupBy, "LocationSizeGroup");
  T->Count("LocationCount", "Location");
  PTable Tj = T->Join("Location", Ts, "Location");
  TStrV UniqueAnimals;
  UniqueAnimals.Add("Animals_1.Animal");
  UniqueAnimals.Add("Animals_2.Animal");
  Tj->Unique(UniqueAnimals, false);
  //print table
   T->SaveSS("tests/animals_out_T.txt");
   Ts.SaveSS("tests/animals_out_Ts.txt");
   Tj->SaveSS("tests/animals_out_Tj.txt");
  return 0;
}
Example #5
0
int main(){
  TTableContext Context;
  // Case 1: Euclidean Distance
  Schema BuildingS;
  BuildingS.Add(TPair<TStr,TAttrType>("Building", atStr));
  BuildingS.Add(TPair<TStr,TAttrType>("X", atInt));
  BuildingS.Add(TPair<TStr,TAttrType>("Y", atInt));

  // create table
  PTable TBuildings = TTable::LoadSS("Buildings", BuildingS, "tests/buildings.txt", Context, '\t', false);

	TStrV Cols;
	Cols.Add("X");
	Cols.Add("Y");

	// Find all buildings within 5 Euc Distance of each other.
	PTable BuildingJointTable = TBuildings->SelfSimJoin(Cols, "Euclidean_Distance", L2Norm, 5.0);
	BuildingJointTable->SaveSS("tests/buildings.out.txt");

  // Case2 : Haversine distance 
  Schema PlaceS;
  PlaceS.Add(TPair<TStr,TAttrType>("Name", atStr));
  PlaceS.Add(TPair<TStr,TAttrType>("Location", atStr));
  PlaceS.Add(TPair<TStr,TAttrType>("Latitude", atFlt));
  PlaceS.Add(TPair<TStr,TAttrType>("Longitude", atFlt));

  // create table
  PTable TPlaces = TTable::LoadSS("Places", PlaceS, "tests/places.txt", Context, '\t', false);

	Cols.Clr();
	Cols.Add("Latitude");
	Cols.Add("Longitude");

	PTable PlacesJointTable = TPlaces->SelfSimJoin(Cols, "Distance",Haversine, 1000.0);

	TStrV ProjectionV;
	ProjectionV.Add("Places_1.Name");
	ProjectionV.Add("Places_1.Location");	
	ProjectionV.Add("Places_2.Name");
	ProjectionV.Add("Places_2.Location");
	ProjectionV.Add("Distance");
	PlacesJointTable->ProjectInPlace(ProjectionV);
	PlacesJointTable->SelectAtomic("Places_1.Name", "Places_2.Name", NEQ);
	PlacesJointTable->SaveSS("tests/places.out.txt");

	printf("Saved buildings.out.txt and places.out.txt\n");
  return 0;
}
Example #6
0
void test_ints(){
  TTableContext context;
  Schema schema;
  schema.Add(TPair<TStr,TAttrType>("src",atInt));
  schema.Add(TPair<TStr,TAttrType>("dst",atInt));
  TStr wikifilename = "/dfs/scratch0/viswa/wiki_Vote.txt";

  PTable wikitable = TTable::LoadSS(schema, wikifilename, &context, '\t', TBool(false));
  printf("Loaded the table!\n");
  PUNGraph pungraph = TSnap::ToGraph<PUNGraph>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst);
  printf("Made the TUNGraph of %d nodes and %d edges.\n",(*pungraph).GetNodes(),(*pungraph).GetEdges());
  PNGraph pngraph = TSnap::ToGraph<PNGraph>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst);
  printf("Made the TNGraph of %d nodes and %d edges.\n",(*pngraph).GetNodes(),(*pngraph).GetEdges());
  PNGraphMP pngraphmp = TSnap::ToGraphMP<PNGraphMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1());
  printf("Made the TNGraphMP of %d nodes and %d edges.\n",(*pngraphmp).GetNodes(),(*pngraphmp).GetEdges());

//  PNGraphMP pngraphmp_2 = TSnap::ToGraphMP3<PNGraphMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1());
//  printf("Made the TNGraphMP of %d nodes and %d edges with MP2.\n",(*pngraphmp_2).GetNodes(),(*pngraphmp_2).GetEdges());

  printf("Tested graph conversion with ints.\n");

  TVec<TStr> emptyattrv;
  PNEANet pneanet = TSnap::ToNetwork<PNEANet>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),emptyattrv, emptyattrv,emptyattrv,aaFirst);
  printf("Made the PNEANet of %d nodes and %d edges.\n", (*pneanet).GetNodes(),(*pneanet).GetEdges());

  PNEANetMP pneanetmp = TSnap::ToNetworkMP<PNEANetMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst);
  printf("Made the PNEANetMP of %d nodes and %d edges.\n", (*pneanetmp).GetNodes(),(*pneanet).GetEdges());

  PNEANetMP pneanetmp2 = TSnap::ToNetworkMP2<PNEANetMP>(wikitable,schema[0].GetVal1(),schema[1].GetVal1(),aaFirst);
  printf("Made the PNEANetMP of %d nodes and %d edges with MP2.\n", (*pneanetmp2).GetNodes(),(*pneanetmp).GetEdges());

  printf("Tested network conversions with ints.\n");

  
}
Example #7
0
//Function to read a table of nodes
PTable AddNodeTable(TTableContext& Context) {
  Schema NodeScm;
  NodeScm.Add(TPair<TStr, TAttrType>("NodeID", atStr));
  char FileName[50];
  int ColCnt = 0;
  printf("Adding Node Table\n");
  printf("Enter filename and number of columns (>= 1) \n");
  scanf("%s %d", FileName, &ColCnt);
  for (TInt i = 1; i < ColCnt; i++) {
    TStr ColName = "Attribute" + i.GetStr();
    NodeScm.Add(TPair<TStr, TAttrType>(ColName, atStr));
  }
  TStr FName(FileName);
  PTable T = TTable::LoadSS(NodeScm, FName, Context);
  return T;
}
Example #8
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  RelevantCols.Add(3);

  PTable P = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);

  P->SaveSS("tests/p1.txt");

  TStrV cols;
  cols.Add("Size");
  cols.Add("Number");

  TVec<PTable> R = P->SpliceByGroup(cols);
  for (TInt i = 0; i < R.Len(); i++) {
    TStr fn = i.GetStr();
    R[i]->SaveSS("tests/sznumber" + fn + ".txt");
  }

  P->Unique(cols, true);

  P->SaveSS("tests/p2.txt");

  TStrV group1;
  group1.Add("Location");
  P->Group(group1, "LocationGroup");

  P->SaveSS("tests/p3.txt");

  return 0;
}
Example #9
0
// Function to read in a table of edges
PTable AddEdgeTable(TTableContext& Context) {
  char FileName[200];
  int ColCnt;
  int Reverse;
  printf("Adding Edge Table\n");
  printf("Enter filename, number of columns (>= 2), and whether reverse? (reverse = 1, not reverse = 0\n");
  scanf("%s %d %d", FileName, &ColCnt, &Reverse);
  Schema EdgeScm;
  if (Reverse == 1) {
    EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr));
    EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr));
  }
  else {
    EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr));
    EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr));
  }
  for (TInt i = 1; i < ColCnt-1; i++) {
    TStr ColName = "Attribute" + i.GetStr();
    EdgeScm.Add(TPair<TStr, TAttrType>(ColName, atStr));
  }
  TStr FName(FileName);
  PTable T = TTable::LoadSS(EdgeScm, FName, Context);
  return T;
}
Example #10
0
void test1() {
  TTableContext Context;

  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  RelevantCols.Add(3);

  PTable P = TTable::LoadSS("Animals", AnimalS, "tests/s.txt", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("Size");
  cols.Add("Number");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);

  //P->Unique(cols);
  P->Group(cols, "SizeNumberGroup");

  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);

  P->SaveSS("tests/p3.txt");

}
Example #11
0
// Tests parallel select in-place function.
TEST(TTable, ParallelSelectInPlace) {
  TTableContext Context;

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 

  T1->SelectAtomicIntConst("Src", 87, GT);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(303, T1->GetNumValidRows().Val); 
}
Example #12
0
int main(int argc, char** argv){
  TBool debug = false;
  TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags_200000";
  if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";}
  Schema TagS; 
  TagS.Add(TPair<TStr,TAttrType>("UserId", atInt));
  TTableContext Context;
  TTable::SetMP(false);
	
  float ft_max;
  float mu_max;
	
  timeval timer0;
  gettimeofday(&timer0, NULL);
  double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
  PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context);
  gettimeofday(&timer0, NULL);
  double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
  printf("Time to load tags table: %f\n", t2 - t1);
  printf("Table Size:\n");
  Tags->PrintSize();
  Tags->PrintContextSize();
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  timeval timer1;
  gettimeofday(&timer1, NULL);
  t1 = timer1.tv_sec + (timer1.tv_usec/1000000.0);
  PTable TagsJoinTag = Tags->SelfJoin("Tag");
  gettimeofday(&timer1, NULL);
  t2 = timer1.tv_sec + (timer1.tv_usec/1000000.0);
  printf("Time to join on tags column: %f\n", t2 - t1);
  printf("Table Size:\n");
  TagsJoinTag->PrintSize();
  if(debug){ TagsJoinTag->SaveSS(TagsFnm + "_join_tag.tsv");}
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  return 0;
}
Example #13
0
// Tests parallel table to graph function.
TEST(TTable, ToGraphMP) {
  TTableContext Context;

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 

  TVec<TPair<TStr, TAttrType> > S = T1->GetSchema();
  PNGraphMP Graph = TSnap::ToGraphMP<PNGraphMP>(T1, S[0].GetVal1(), S[1].GetVal1());

  EXPECT_EQ(689,Graph->GetNodes());
  EXPECT_EQ(499,Graph->GetEdges());
  EXPECT_EQ(1,Graph->IsOk());
}
Example #14
0
// Tests parallel select function.
TEST(TTable, ParallelSelect) {
  TTableContext Context;

  // TODO: Change this to point to a local copy of the LiveJournal table binary.
  // char srcfile[100] = "/dfs/ilfs2/0/ringo/benchmarks/soc-LiveJournal1.table";

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 

  PTable T2 = TTable::New(T1->GetSchema(), &Context);
  T1->SelectAtomicIntConst("Src", 88, LT, T2);

  EXPECT_EQ(196, T2->GetNumRows().Val);
  EXPECT_EQ(196, T2->GetNumValidRows().Val); 
}
Example #15
0
// Tests parallel join function.
TEST(TTable, ParallelJoin) {
  TTableContext Context;

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 
  
  PTable T2 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T2->GetNumRows().Val);
  EXPECT_EQ(499, T2->GetNumValidRows().Val); 

  PTable P = T1->Join("Src", T2, "Dst");

  EXPECT_EQ(24, P->GetNumRows().Val);
  EXPECT_EQ(24, P->GetNumValidRows().Val); 
}
Example #16
0
// Tests load and save from text file.
TEST(TTable, LoadSave) {
  TTableContext Context;
  // Create schema.
  Schema GradeS;
  GradeS.Add(TPair<TStr,TAttrType>("Class", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Area", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);
  RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5);

  PTable P = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols);

  EXPECT_EQ(5, P->GetNumRows().Val);
  EXPECT_EQ(5, P->GetNumValidRows().Val); 

  EXPECT_EQ(7, P->GetIntVal("Grade 2011", 0).Val);
  EXPECT_EQ(9, P->GetIntVal("Grade 2013", 4).Val);
  EXPECT_STREQ("Compilers", P->GetStrVal("Class", 3).CStr());

  P->SaveSS("table/p1.txt");

  // Test SaveSS by loading the saved table and testing values again.
  GradeS.Add(TPair<TStr,TAttrType>("_id", atInt));
  P = TTable::LoadSS(GradeS, "table/p1.txt", &Context, RelevantCols);

  EXPECT_EQ(5, P->GetNumRows().Val);
  EXPECT_EQ(5, P->GetNumValidRows().Val); 

  EXPECT_EQ(7, P->GetIntVal("Grade 2011", 0).Val);
  EXPECT_EQ(9, P->GetIntVal("Grade 2013", 4).Val);
  EXPECT_STREQ("Compilers", P->GetStrVal("Class", 3).CStr());
}
Example #17
0
void test_strs() {

  TTableContext context;
  Schema schema;
  schema.Add(TPair<TStr,TAttrType>("src",atStr));
  schema.Add(TPair<TStr,TAttrType>("dst",atStr));
  TStr wikifilename = "/dfs/scratch0/viswa/wiki_Vote.txt";

  PTable wikitable = TTable::LoadSS(schema, wikifilename, &context, '\t', TBool(false));

  TIntV rowidx1, rowidx2, rowidx3;
  double delta1, delta2, delta3;
  struct timeval start, end;
  printf("Str: Before indexing ...\n");
  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx1 = wikitable->GetStrRowIdxByMap("src", 7096);
  gettimeofday(&end, NULL);
  delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx2 = wikitable->GetStrRowIdxByMap("src", 7114);
  gettimeofday(&end, NULL);
  delta2 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx3 = wikitable->GetStrRowIdxByMap("src", 0);
  gettimeofday(&end, NULL);
  delta3 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;


  printf("Time 1: %f\tTime 2: %f\t Time 3: %f\n", delta1, delta2, delta3);
  printf("%d %d\n", rowidx1[0], rowidx2[0]);

  printf("Row Idx3: ");
  for (int i = 0 ; i < rowidx3.Len() ; i++ ) {
    printf("%d ", rowidx3[i]);
  }
  printf("\n");

  gettimeofday(&start, NULL);
  wikitable->RequestIndexStrMap("src");
  gettimeofday(&end, NULL);
  delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;
  printf("Str: Time to index: %f\n", delta1);


  printf("Str: After indexing ...\n");
  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx1 = wikitable->GetStrRowIdxByMap("src", 7096);
  gettimeofday(&end, NULL);
  delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx2 = wikitable->GetStrRowIdxByMap("src", 7114);
  gettimeofday(&end, NULL);
  delta2 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx3 = wikitable->GetStrRowIdxByMap("src", 0);
  gettimeofday(&end, NULL);
  delta3 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  printf("Time 1: %f\tTime 2: %f\t Time 3: %f\n", delta1, delta2, delta3);
  printf("%d %d\n", rowidx1[0], rowidx2[0]);

  printf("Row Idx3: ");
  for (int i = 0 ; i < rowidx3.Len() ; i++ ) {
    printf("%d ", rowidx3[i]);
  }
  printf("\n");

  /*
  int i = 0;
  for (TRowIterator RowI = wikitable->BegRI(); RowI < wikitable->EndRI(); RowI++) {
    if (i > 100000)
    printf("%d %d %d\n", RowI.GetRowIdx(), RowI.GetStrMapByName("src"), RowI.GetStrMapByName("dst"));
    i++;
//    if (i > 10) break;
  }
  */
}
int main(int argc, char* argv[]) {
  TTableContext Context;
  Schema NetworkS;
  NetworkS.Add(TPair<TStr, TAttrType>("Year", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("Month", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DayOfMonth", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DayOfWeek", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DepTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CRSDepTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("ArrTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CRSArrTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("UniqueCarrier", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("FlightNum", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("TailNum", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("ActualElapsedTime", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("CRSElapsedTime", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("AirTime", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("ArrDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DepDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("Origin", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("Dest", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("Distance", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("TaxiIn", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("TaxiOut", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("Cancelled", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CancellationCode", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("Diverted", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CarrierDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("WeatherDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("NASDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("SecurityDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("LateAircraftDelay", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5);
  RelevantCols.Add(6); RelevantCols.Add(7); RelevantCols.Add(8); RelevantCols.Add(9); RelevantCols.Add(10); RelevantCols.Add(11);
  RelevantCols.Add(12); RelevantCols.Add(13); RelevantCols.Add(14); RelevantCols.Add(15); RelevantCols.Add(16); RelevantCols.Add(17);
  RelevantCols.Add(18); RelevantCols.Add(19); RelevantCols.Add(20); RelevantCols.Add(21); RelevantCols.Add(22); RelevantCols.Add(23);
  RelevantCols.Add(24); RelevantCols.Add(25); RelevantCols.Add(26); RelevantCols.Add(27); RelevantCols.Add(28);
  PTable P = TTable::LoadSS(NetworkS, "table/2007.csv",
  Context, RelevantCols, ',', false);

  TStrV SV;
  TStrV DV;
  TStrV VE;
  double start = omp_get_wtime();
  PNSparseNet G = TSnap::ToNetwork<PNSparseNet>(P, TStr("Origin"),
   TStr("Dest"), SV, DV, VE, aaLast);
  double end = omp_get_wtime();
  printf("Conversion time without attributes %f\n", (end-start));

  start = omp_get_wtime();
  TSnap::AddAttrTable<PNSparseNet>(P, G, TStr("Origin"),
   TStr("Dest"), SV, DV, VE, aaLast);
  end = omp_get_wtime();
  printf("Conversion time with attributes %f\n", (end-start));

  /*(PTable Table, PGraph& Graph, const TStr& SrcCol, const TStr& DstCol, 
  TStrV& SrcAttrV, TStrV& DstAttrV, TStrV& EdgeAttrV, TAttrAggr AggrPolicy, TInt DefaultInt,
  TFlt DefaultFlt, TStr DefaultStr)*/

}
int main(int argc, char** argv) {
    TBool debug = false;
    TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags";
    if(debug) {
        TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";
    }
    Schema TagS;
    TagS.Add(TPair<TStr,TAttrType>("UserId", atInt));
    TagS.Add(TPair<TStr,TAttrType>("Tag", atStr));
    // Load a TTable object and benchmark how long it takes to iterate over all of its records
    TTableContext Context;
    TTable::SetMP(false);

    //float ft0, ft1, ft2, ft3;
    //float mu0, mu1, mu2, mu3;
    float ft_max;
    float mu_max;
    //float tdiff1, tdiff2, dfiff3;
    //float mdiff1, mdiff2, mdiff3;

    //getcpumem(&ft0,&mu0);
    timeval timer0;
    gettimeofday(&timer0, NULL);
    double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
    PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context);
    gettimeofday(&timer0, NULL);
    double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
    printf("Time to load tags table: %f\n", t2 - t1);
    printf("Table Size:\n");
    Tags->PrintSize();
    Tags->PrintContextSize();
    //getcpumem(&ft1,&mu1);
    //tdiff1 = ft1 - ft0;
    //mdiff1 = mu1 - mu0;
    //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff1, mdiff1);
    getmaxcpumem(&ft_max, &mu_max);
    printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
    printf("\n");

    timeval timer2;
    gettimeofday(&timer2, NULL);
    t1 = timer2.tv_sec + (timer2.tv_usec/1000000.0);
    printf("start join on user\n");
    PTable TagsJoinUser = Tags->SelfJoin("UserId");
    printf("finish join on user\n");
    gettimeofday(&timer2, NULL);
    t2 = timer2.tv_sec + (timer2.tv_usec/1000000.0);
    printf("Time to join on user id column: %f\n", t2 - t1);
    printf("Table Size:\n");
    TagsJoinUser->PrintSize();
    //getcpumem(&ft2,&mu2);
    if(debug) {
        TagsJoinUser->SaveSS(TagsFnm + "_join_user.tsv");
    }
    //tdiff2 = ft2 - ft1;
    //mdiff2 = mu2 - mu1;
    //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff2, mdiff2);
    getmaxcpumem(&ft_max, &mu_max);
    printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
    printf("\n");

    timeval timer3;
    gettimeofday(&timer3, NULL);
    t1 = timer3.tv_sec + (timer3.tv_usec/1000000.0);
    PTable JavaTags = TTable::New(TagS, Context);
    TIntV SelectedRows;
    if(debug) {
        Tags->SelectAtomicConst(TStr("Tag"), TStr("c#"), EQ, SelectedRows, JavaTags, false, true);
    } else {
        Tags->SelectAtomicConst(TStr("Tag"), TStr("java"), EQ, SelectedRows, JavaTags, false, true);
    }
    gettimeofday(&timer3, NULL);
    t2 = timer3.tv_sec + (timer3.tv_usec/1000000.0);
    printf("Time to select java users: %f\n", t2 - t1);
    printf("Table Size:\n");
    JavaTags->PrintSize();
    //getcpumem(&ft3,&mu3);
    if(debug) {
        JavaTags->SaveSS(TagsFnm + "_select.tsv");
    }
    //tdiff3 = ft3 - ft2;
    //mdiff3 = mu3 - mu2;
    //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff3, mdiff3);
    getmaxcpumem(&ft_max, &mu_max);
    printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
    printf("\n");
    return 0;
}
Example #20
0
/**
 * Used for benchmarking sorting by source algorithm.
 * Takes as input starting point of
 * a top cascade and outputs time taken for casacade detection. 
 * Input : Source, Dest, Start, Duration 
 * Output : Prints the time for cascade detection
 */
int main(int argc,char* argv[]) {
  TTableContext Context;
  Schema TimeS;
  TimeS.Add(TPair<TStr,TAttrType>("Source",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Dest",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Start",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Duration",atInt));
  PTable P1 = TTable::LoadSS(TimeS,"./../../../../datasets/temporal/yemen_call_201001.txt",&Context,' ');
  TIntV MapV;
  TStrV SortBy;
  SortBy.Add("Source");
  P1->Order(SortBy);
  TIntV Source; // Sorted vec of start time
  P1->ReadIntCol("Source",Source);
  for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) {
    MapV.Add(RI.GetRowIdx());
  }
  // Attribute to Int mapping
  TInt SIdx = P1->GetColIdx("Source");
  TInt DIdx = P1->GetColIdx("Dest");
  TInt StIdx = P1->GetColIdx("Start");
  TInt DuIdx = P1->GetColIdx("Duration");
  int W = atoi(argv[1]);
  int len = 0;
  // Find the starting point
  int TSource = atoi(argv[2]);
  int TDest = atoi(argv[3]);
  int TStart = atoi(argv[4]);
  int TDur = atoi(argv[5]);
  TInt RIdx;
  for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) {
    RIdx = RI.GetRowIdx();
    int RSource = P1->GetIntValAtRowIdx(SIdx,RIdx).Val;
    int RDest = P1->GetIntValAtRowIdx(DIdx,RIdx).Val;
    int RStart = P1->GetIntValAtRowIdx(StIdx,RIdx).Val;
    int RDur = P1->GetIntValAtRowIdx(DuIdx,RIdx).Val;
    if (TSource == RSource && TDest == RDest && TStart == RStart && TDur == RDur) break;
  }
  // Start building the cascade from the start point
  clock_t st,et;
  st = clock();
  for (int i = 0; i < 1; i++) {
    THashSet<TInt> VisitedH;
    TSnapQueue<TInt> EventQ;
    EventQ.Push(RIdx);
    VisitedH.AddKey(RIdx);
    while (!EventQ.Empty()) {
      TInt CIdx = EventQ.Top();
      EventQ.Pop();
      int CDest = P1->GetIntValAtRowIdx(DIdx,CIdx).Val;
      int CStart = P1->GetIntValAtRowIdx(StIdx,CIdx).Val;
      int CDur = P1->GetIntValAtRowIdx(DuIdx,CIdx).Val;
      // In line binary search
      int val = CDest;
      int lo = 0;
      int hi = Source.Len() - 1;
      int index = -1;
      while (hi >= lo) {
        int mid = lo + (hi - lo)/2;
        if (Source.GetVal(mid) > val) { hi = mid - 1;}
        else if (Source.GetVal(mid) < val) { lo = mid + 1;}
        else { index = mid; hi = mid - 1;}
      } 
      // End of binary search
      int BIdx = index;
      for(int i = BIdx; i < Source.Len(); i++) {
        int PId = MapV.GetVal(i).Val;
        if (! VisitedH.IsKey(PId)) {
          int TSource = P1->GetIntValAtRowIdx(SIdx,PId).Val;
          int TStart = P1->GetIntValAtRowIdx(StIdx,PId).Val;
          if (TSource != CDest) {
            break;
          }
          if (TStart >= (CDur + CStart) && TStart - (CDur + CStart) <= W) {
            VisitedH.AddKey(PId);
            EventQ.Push(PId);
          }
        }
      }
    }
    len = VisitedH.Len();
  }
  et = clock();
  float diff = ((float) et - (float) st)/CLOCKS_PER_SEC;
  printf("Size %d,Time %f\n",len,diff);
  return 0;
}