Example #1
0
int main(int argc, char* argv[]){
  //test1();
  TTableContext Context;

  // create scheme
  Schema PostS;
  PostS.Add(TPair<TStr,TAttrType>("Id", atInt));
  PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr));
  PostS.Add(TPair<TStr,TAttrType>("Score", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4);

  PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("OwnerUserId");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);
  P->Aggregate(cols, aaSum, "Score", "Sum");
  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);
  
  if (atoi(argv[1]) == 0) return 0;

  P->SaveSS("tests/p3.txt");

  return 0;
}
Example #2
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  // create table
  PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);
  //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
  T->Unique("Animal");
  TTable Ts = *T;  // did we fix problem with copy-c'tor ?
  //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols);
  //Ts->Unique(AnimalUnique);

  // test Select
  // create predicate tree: find all animals that are big and african or medium and Australian
  TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa");  
  TPredicate::TPredicateNode N1(A1);  // Location == "Africa"
  TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big");  
  TPredicate::TPredicateNode N2(A2);  // Size == "big"
  TPredicate::TPredicateNode N3(AND);
  N3.AddLeftChild(&N1);
  N3.AddRightChild(&N2);
  TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia");  
  TPredicate::TPredicateNode N4(A4);  
  TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium");  
  TPredicate::TPredicateNode N5(A5); 
  TPredicate::TPredicateNode N6(AND);
  N6.AddLeftChild(&N4);
  N6.AddRightChild(&N5);
  TPredicate::TPredicateNode N7(OR);
  N7.AddLeftChild(&N3);
  N7.AddRightChild(&N6);
  TPredicate Pred(&N7);
  TIntV SelectedRows;
  Ts.Select(Pred, SelectedRows);

  TStrV GroupBy;
  GroupBy.Add("Location");
  T->Group(GroupBy, "LocationGroup");
  GroupBy.Add("Size");
  T->Group(GroupBy, "LocationSizeGroup");
  T->Count("LocationCount", "Location");
  PTable Tj = T->Join("Location", Ts, "Location");
  TStrV UniqueAnimals;
  UniqueAnimals.Add("Animals_1.Animal");
  UniqueAnimals.Add("Animals_2.Animal");
  Tj->Unique(UniqueAnimals, false);
  //print table
   T->SaveSS("tests/animals_out_T.txt");
   Ts.SaveSS("tests/animals_out_Ts.txt");
   Tj->SaveSS("tests/animals_out_Tj.txt");
  return 0;
}
Example #3
0
int main(int argc, char** argv){
  TTableContext Context;

  char filename[500] = "/dfs/ilfs2/0/ringo/benchmarks/soc-LiveJournal1.table";
  if (argc >= 2){
    strcpy(filename,argv[1]);
  }
  struct timeval start, end;
  float delta;
  TTmProfiler Profiler;
  int TimerId = Profiler.AddTimer("Profiler");

  Profiler.ResetTimer(TimerId);
  Profiler.StartTimer(TimerId);
  gettimeofday(&start, NULL);
  TFIn FIn(filename);
  PTable Q = TTable::Load(FIn, Context);
  Profiler.StopTimer(TimerId);
  gettimeofday(&end, NULL);
  delta = ((end.tv_sec  - start.tv_sec) * 1000000u + 
            end.tv_usec - start.tv_usec) / 1.e6;
  printf("Load time (elapsed): %f, cpu: %f\n", delta, Profiler.GetTimerSec(TimerId));
  
  TVec<TPair<TStr, TAttrType> > Schema = Q->GetSchema();
  Q->SetSrcCol(Schema[0].GetVal1());
  Q->SetDstCol(Schema[1].GetVal1());

  /*Profiler.ResetTimer(TimerId);
  Profiler.StartTimer(TimerId);
  gettimeofday(&start, NULL);
  PUNGraph G1 = Q->ToGraphUndirected(aaFirst);
  Profiler.StopTimer(TimerId);
  gettimeofday(&end, NULL);
  delta = ((end.tv_sec  - start.tv_sec) * 1000000u + 
            end.tv_usec - start.tv_usec) / 1.e6;
  printf("ToGraphUndirected time (elapsed): %f, cpu: %f\n", delta, Profiler.GetTimerSec(TimerId));*/

  Profiler.ResetTimer(TimerId);
  Profiler.StartTimer(TimerId);
  gettimeofday(&start, NULL);
  PNGraph G2 = Q->ToGraphDirected(aaFirst);
  Profiler.StopTimer(TimerId);
  gettimeofday(&end, NULL);
  delta = ((end.tv_sec  - start.tv_sec) * 1000000u + 
            end.tv_usec - start.tv_usec) / 1.e6;
  printf("ToGraphDirected time (elapsed): %f, cpu: %f\n", delta, Profiler.GetTimerSec(TimerId));

  /*Profiler.ResetTimer(TimerId);
  Profiler.StartTimer(TimerId);
  gettimeofday(&start, NULL);
  PNEANet G3 = Q->ToGraph(aaFirst);
  Profiler.StopTimer(TimerId);
  gettimeofday(&end, NULL);
  delta = ((end.tv_sec  - start.tv_sec) * 1000000u + 
            end.tv_usec - start.tv_usec) / 1.e6;
  printf("ToGraph time (elapsed): %f, cpu: %f\n", delta, Profiler.GetTimerSec(TimerId));*/
  return 0;
}
Example #4
0
PTable TTable::LoadSS(const TStr& TableName, const Schema& S, const TStr& InFNm, const char& Separator, TBool HasTitleLine){
  TSsParser Ss(InFNm, Separator);
  PTable T = New(TableName, S);
  // if title line (i.e. names of the columns) is included as first row in the
  // input file - use it to validate schema
  if(HasTitleLine){
    Ss.Next();  
    if(S.Len() != Ss.GetFlds()){TExcept::Throw("Table Schema Mismatch!");}
    for(TInt i = 0; i < Ss.GetFlds(); i++){
      // remove carriage return char
      TInt L = strlen(Ss[i]);
      if(Ss[i][L-1] < ' '){ Ss[i][L-1] = 0;}
      if(T->GetSchemaColName(i) != Ss[i]){ TExcept::Throw("Table Schema Mismatch!");}
    }
  }
  TInt RowLen = S.Len();
  TVec<TYPE> ColTypes = TVec<TYPE>(RowLen);
  for(TInt i = 0; i < RowLen; i++){
    ColTypes[i] = T->GetSchemaColType(i);
  }
  // populate table columns
  while(Ss.Next()){
    TInt IntColIdx = 0;
    TInt FltColIdx = 0;
    TInt StrColIdx = 0;
    Assert(Ss.GetFlds() == RowLen); // compiled only in debug
    for(TInt i = 0; i < RowLen; i++){
      switch(ColTypes[i]){
        case INT:
          T->IntCols[IntColIdx].Add(Ss.GetInt(i));
          IntColIdx++;
          break;
        case FLT:
          T->FltCols[FltColIdx].Add(Ss.GetFlt(i));
          FltColIdx++;
          break;
        case STR:
          T->AddStrVal(StrColIdx, Ss[i]);
          StrColIdx++;
          break;
      }
    }
  }

  // set number of rows and "Next" vector
  T->NumRows = Ss.GetLineNo()-1;
  if(HasTitleLine){T->NumRows--;}
  T->NumValidRows = T->NumRows;
  T->Next = TIntV(T->NumRows,0);
  for(TInt i = 0; i < T->NumRows-1; i++){
    T->Next.Add(i+1);
  }
  T->Next.Add(Last);
  return T;
}
Example #5
0
int main(){
  TTableContext Context;
  // Case 1: Euclidean Distance
  Schema BuildingS;
  BuildingS.Add(TPair<TStr,TAttrType>("Building", atStr));
  BuildingS.Add(TPair<TStr,TAttrType>("X", atInt));
  BuildingS.Add(TPair<TStr,TAttrType>("Y", atInt));

  // create table
  PTable TBuildings = TTable::LoadSS("Buildings", BuildingS, "tests/buildings.txt", Context, '\t', false);

	TStrV Cols;
	Cols.Add("X");
	Cols.Add("Y");

	// Find all buildings within 5 Euc Distance of each other.
	PTable BuildingJointTable = TBuildings->SelfSimJoin(Cols, "Euclidean_Distance", L2Norm, 5.0);
	BuildingJointTable->SaveSS("tests/buildings.out.txt");

  // Case2 : Haversine distance 
  Schema PlaceS;
  PlaceS.Add(TPair<TStr,TAttrType>("Name", atStr));
  PlaceS.Add(TPair<TStr,TAttrType>("Location", atStr));
  PlaceS.Add(TPair<TStr,TAttrType>("Latitude", atFlt));
  PlaceS.Add(TPair<TStr,TAttrType>("Longitude", atFlt));

  // create table
  PTable TPlaces = TTable::LoadSS("Places", PlaceS, "tests/places.txt", Context, '\t', false);

	Cols.Clr();
	Cols.Add("Latitude");
	Cols.Add("Longitude");

	PTable PlacesJointTable = TPlaces->SelfSimJoin(Cols, "Distance",Haversine, 1000.0);

	TStrV ProjectionV;
	ProjectionV.Add("Places_1.Name");
	ProjectionV.Add("Places_1.Location");	
	ProjectionV.Add("Places_2.Name");
	ProjectionV.Add("Places_2.Location");
	ProjectionV.Add("Distance");
	PlacesJointTable->ProjectInPlace(ProjectionV);
	PlacesJointTable->SelectAtomic("Places_1.Name", "Places_2.Name", NEQ);
	PlacesJointTable->SaveSS("tests/places.out.txt");

	printf("Saved buildings.out.txt and places.out.txt\n");
  return 0;
}
void CEntityFactory::CreateNewEntityCat(CStrID Name, const Data::PParams& Desc, bool CreateInstDataset)
{
	CreateEntityCat(Name, Desc);

	CEntityCat& Cat = Categories[Name];

	PParams P = DataSrv->LoadHRD("data:tables/EntityCats.hrd");
	P->Set(Name, Desc);
	DataSrv->SaveHRD("data:tables/EntityCats.hrd", P);

	// Create Tpl table

	if (LoaderSrv->GetStaticDB()->HasTable(Cat.TplTableName))
		LoaderSrv->GetStaticDB()->DeleteTable(Cat.TplTableName);
	
	PTable TplTable = DB::CTable::Create();
	TplTable->SetName(Cat.TplTableName);
	TplTable->AddColumn(CColumn(Attr::GUID, CColumn::Primary));
	// All other tpl fields are added on demand by editor
	// Template creation is available only in editor

	LoaderSrv->GetStaticDB()->AddTable(TplTable);

	Cat.TplDataset = TplTable->CreateDataset();
	Cat.TplDataset->AddColumnsFromTable();

	// Create Inst table

	if (LoaderSrv->GetGameDB()->HasTable(Cat.InstTableName))
		LoaderSrv->GetGameDB()->DeleteTable(Cat.InstTableName);
	
	PTable InstTable = DB::CTable::Create();
	InstTable->SetName(Cat.InstTableName);
	InstTable->AddColumn(CColumn(Attr::GUID, CColumn::Primary));
	InstTable->AddColumn(CColumn(Attr::LevelID, CColumn::Indexed));
	for (nArray<CAttrID>::iterator It = Cat.Attrs.Begin(); It != Cat.Attrs.End(); It++)
		InstTable->AddColumn(*It);
	
	LoaderSrv->GetGameDB()->AddTable(InstTable);

	if (CreateInstDataset)
	{
		Cat.InstDataset = LoaderSrv->GetGameDB()->GetTable(Cat.InstTableName)->CreateDataset();
		Cat.InstDataset->AddColumnsFromTable();
	}
}
Example #7
0
 PTable TTable::InitializeJointTable(const TTable& Table){
  PTable JointTable = New();
  JointTable->Name = Name + "_" + Table.Name;
  JointTable->IntCols = TVec<TIntV>(IntCols.Len() + Table.IntCols.Len());
  JointTable->FltCols = TVec<TFltV>(FltCols.Len() + Table.FltCols.Len());
  JointTable->StrColMaps = TVec<TIntV>(StrColMaps.Len() + Table.StrColMaps.Len());
  for(TInt i = 0; i < S.Len(); i++){
    TStr ColName = GetSchemaColName(i);
    TYPE ColType = GetSchemaColType(i);
    TStr CName = Name + "." + ColName;
    JointTable->ColTypeMap.AddDat(CName, ColTypeMap.GetDat(ColName));
    JointTable->AddLabel(CName, ColName);
    JointTable->AddSchemaCol(CName, ColType);
  }
  for(TInt i = 0; i < Table.S.Len(); i++){
    TStr ColName = Table.GetSchemaColName(i);
    TYPE ColType = Table.GetSchemaColType(i);
    TStr CName = Table.Name + "." + ColName;
    TPair<TYPE, TInt> NewDat = Table.ColTypeMap.GetDat(ColName);
    Assert(ColType == NewDat.Val1);
    // add offsets
    switch(NewDat.Val1){
      case INT:
        NewDat.Val2 += IntCols.Len();
        break;
      case FLT:
        NewDat.Val2 += FltCols.Len();
        break;
      case STR:
        NewDat.Val2 += StrColMaps.Len();
        break;
    }
    JointTable->ColTypeMap.AddDat(CName, NewDat);
    JointTable->AddLabel(CName, ColName);
    JointTable->AddSchemaCol(CName, ColType);
  }
  return JointTable;
 }
Example #8
0
// Tests parallel join function.
TEST(TTable, ParallelJoin) {
  TTableContext Context;

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 
  
  PTable T2 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T2->GetNumRows().Val);
  EXPECT_EQ(499, T2->GetNumValidRows().Val); 

  PTable P = T1->Join("Src", T2, "Dst");

  EXPECT_EQ(24, P->GetNumRows().Val);
  EXPECT_EQ(24, P->GetNumValidRows().Val); 
}
Example #9
0
void test1() {
  TTableContext Context;

  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  RelevantCols.Add(3);

  PTable P = TTable::LoadSS("Animals", AnimalS, "tests/s.txt", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("Size");
  cols.Add("Number");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);

  //P->Unique(cols);
  P->Group(cols, "SizeNumberGroup");

  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);

  P->SaveSS("tests/p3.txt");

}
Example #10
0
// Tests load and save from text file.
TEST(TTable, LoadSave) {
  TTableContext Context;
  // Create schema.
  Schema GradeS;
  GradeS.Add(TPair<TStr,TAttrType>("Class", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Area", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);
  RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5);

  PTable P = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols);

  EXPECT_EQ(5, P->GetNumRows().Val);
  EXPECT_EQ(5, P->GetNumValidRows().Val); 

  EXPECT_EQ(7, P->GetIntVal("Grade 2011", 0).Val);
  EXPECT_EQ(9, P->GetIntVal("Grade 2013", 4).Val);
  EXPECT_STREQ("Compilers", P->GetStrVal("Class", 3).CStr());

  P->SaveSS("table/p1.txt");

  // Test SaveSS by loading the saved table and testing values again.
  GradeS.Add(TPair<TStr,TAttrType>("_id", atInt));
  P = TTable::LoadSS(GradeS, "table/p1.txt", &Context, RelevantCols);

  EXPECT_EQ(5, P->GetNumRows().Val);
  EXPECT_EQ(5, P->GetNumValidRows().Val); 

  EXPECT_EQ(7, P->GetIntVal("Grade 2011", 0).Val);
  EXPECT_EQ(9, P->GetIntVal("Grade 2013", 4).Val);
  EXPECT_STREQ("Compilers", P->GetStrVal("Class", 3).CStr());
}
Example #11
0
int main(int argc, char** argv){
  TBool debug = false;
  TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags_200000";
  if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";}
  Schema TagS; 
  TagS.Add(TPair<TStr,TAttrType>("UserId", atInt));
  TTableContext Context;
  TTable::SetMP(false);
	
  float ft_max;
  float mu_max;
	
  timeval timer0;
  gettimeofday(&timer0, NULL);
  double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
  PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context);
  gettimeofday(&timer0, NULL);
  double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
  printf("Time to load tags table: %f\n", t2 - t1);
  printf("Table Size:\n");
  Tags->PrintSize();
  Tags->PrintContextSize();
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  timeval timer1;
  gettimeofday(&timer1, NULL);
  t1 = timer1.tv_sec + (timer1.tv_usec/1000000.0);
  PTable TagsJoinTag = Tags->SelfJoin("Tag");
  gettimeofday(&timer1, NULL);
  t2 = timer1.tv_sec + (timer1.tv_usec/1000000.0);
  printf("Time to join on tags column: %f\n", t2 - t1);
  printf("Table Size:\n");
  TagsJoinTag->PrintSize();
  if(debug){ TagsJoinTag->SaveSS(TagsFnm + "_join_tag.tsv");}
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  return 0;
}
Example #12
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  RelevantCols.Add(3);

  PTable P = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);

  P->SaveSS("tests/p1.txt");

  TStrV cols;
  cols.Add("Size");
  cols.Add("Number");

  TVec<PTable> R = P->SpliceByGroup(cols);
  for (TInt i = 0; i < R.Len(); i++) {
    TStr fn = i.GetStr();
    R[i]->SaveSS("tests/sznumber" + fn + ".txt");
  }

  P->Unique(cols, true);

  P->SaveSS("tests/p2.txt");

  TStrV group1;
  group1.Add("Location");
  P->Group(group1, "LocationGroup");

  P->SaveSS("tests/p3.txt");

  return 0;
}
Example #13
0
PNGraph GetBiGraph(PTable P, int index_col_1, int index_col_2) {
  TVec<TPair<TStr, TAttrType>, int > S = P->GetSchema();
  PNGraph Graph = TSnap::ToGraph<PNGraph>(P, S[index_col_1].GetVal1(), S[index_col_2].GetVal1(), aaFirst);
  return Graph;
}
Example #14
0
// Q: Do we want to have any gurantees in terms of order of the 0t rows - i.e. 
// ordered by "this" table row idx as primary key and "Table" row idx as secondary key
 // This means only keeping joint row indices (pairs of original row indices), sorting them
 // and adding all rows in the end. Sorting can be expensive, but we would be able to pre-allocate 
 // memory for the joint table..
PTable TTable::Join(TStr Col1, const TTable& Table, TStr Col2) {
  if(!ColTypeMap.IsKey(Col1)){
    TExcept::Throw("no such column " + Col1);
  }
  if(!ColTypeMap.IsKey(Col2)){
    TExcept::Throw("no such column " + Col2);
  }
  if (GetColType(Col1) != GetColType(Col2)) {
    TExcept::Throw("Trying to Join on columns of different type");
  }
  // initialize result table
  PTable JointTable = InitializeJointTable(Table);
  // hash smaller table (group by column)
  TYPE ColType = GetColType(Col1);
  TBool ThisIsSmaller = (NumValidRows <= Table.NumValidRows);
  const TTable& TS = ThisIsSmaller ? *this : Table;
  const TTable& TB = ThisIsSmaller ?  Table : *this;
  TStr ColS = ThisIsSmaller ? Col1 : Col2;
  TStr ColB = ThisIsSmaller ? Col2 : Col1;
  // iterate over the rows of the bigger table and check for "collisions" 
  // with the group keys for the small table.
  switch(ColType){
    case INT:{
      THash<TInt, TIntV> T;
      TS.GroupByIntCol(Col1, T, TIntV(), true);
      for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){
        TInt K = RowI.GetIntAttr(ColB);
        if(T.IsKey(K)){
          TIntV& Group = T.GetDat(K);
          for(TInt i = 0; i < Group.Len(); i++){
            if(ThisIsSmaller){
              JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());
            } else{
              JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);
            }
          }
        }
      }
      break;
    }
    case FLT:{
      THash<TFlt, TIntV> T;
      TS.GroupByFltCol(Col1, T, TIntV(), true);
      for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){
        TFlt K = RowI.GetFltAttr(ColB);
        if(T.IsKey(K)){
          TIntV& Group = T.GetDat(K);
          for(TInt i = 0; i < Group.Len(); i++){
            if(ThisIsSmaller){
              JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());
            } else{
              JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);
            }
          }
        }
      }
      break;
    }
    case STR:{
      THash<TStr, TIntV> T;
      TS.GroupByStrCol(Col1, T, TIntV(), true);
      for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){
        TStr K = RowI.GetStrAttr(ColB);
        if(T.IsKey(K)){
          TIntV& Group = T.GetDat(K);
          for(TInt i = 0; i < Group.Len(); i++){
            if(ThisIsSmaller){
              JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());
            } else{
              JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);
            }
          }
        }
      }
    }
    break;
  }
 return JointTable; 
}
Example #15
0
int main(int argc, char* argv[])
{
  TEnv Env(argc, argv);
  TStr PrefixPath = Env.GetArgs() > 1 ? Env.GetArg(1) : TStr("");

  double ts1 = Tick();
  TTableContext Context;
  TVec<PTable> NodeTblV = TVec<PTable>();
  TVec<PTable> EdgeTblV = TVec<PTable>();
  Schema NodeSchema = Schema();
  Schema EdgeSchema = Schema();
  LoadFlickrTables(PrefixPath, Context, NodeTblV, NodeSchema, EdgeTblV, EdgeSchema);

  double ts2 = Tick();

  int ExpectedSz = 0;
  for (TVec<PTable>::TIter it = NodeTblV.BegI(); it < NodeTblV.EndI(); it++) {
    PTable Table = *it;
    ExpectedSz += Table->GetNumRows();
  }

  THash<TStr, TInt> Hash(ExpectedSz);
  TStrV OriNIdV(ExpectedSz);

  MergeNodeTables(NodeTblV, NodeSchema, Hash, OriNIdV);
  PTable EdgeTable = MergeEdgeTables(EdgeTblV, EdgeSchema, Hash, Context);

  double ts3 = Tick();
  TStrV V;
  TStrV VE;
  VE.Add(EdgeSchema.GetVal(2).GetVal1());
  PNEANet Graph = TSnap::ToNetwork<PNEANet>(EdgeTable, EdgeSchema.GetVal(0).GetVal1(), EdgeSchema.GetVal(1).GetVal1(),
						V, V, VE, aaLast);
  double ts4 = Tick();

  //int nExps = 1;
  int nExps = 40;
  TIntFltH PageRankResults;
  for (int i = 0; i < nExps; i++) {
    PageRankResults = TIntFltH(ExpectedSz);
#ifdef USE_OPENMP
    TSnap::GetWeightedPageRankMP2(Graph, PageRankResults, EdgeSchema.GetVal(2).GetVal1(), 0.849999999999998, 0.0001, 10);
#else
    TSnap::GetWeightedPageRank(Graph, PageRankResults, EdgeSchema.GetVal(2).GetVal1(), 0.849999999999998, 0.0001, 10);
#endif
  }
  double ts5 = Tick();

  PSOut ResultOut = TFOut::New(PrefixPath + TStr("page-rank-results.tsv"));
  for (TIntFltH::TIter it = PageRankResults.BegI(); it < PageRankResults.EndI(); it++) {
    ResultOut->PutStrFmtLn("%s\t%f9", OriNIdV[it.GetKey()].CStr(), it.GetDat().Val);
  }
  double ts6 = Tick();

  bool isPar = false;
#ifdef USE_OPENMP
  isPar = true;
#endif

//  PSOut FeaturesOut = TFOut::New(PrefixPath + "features.txt");
//  FeaturesOut->PutStrFmtLn("Photo %d", PPhotoTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Users %d", PUserTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Tags %d", PTagTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Comments %d", PCommentTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Locations %d", PLocationTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Photo - Owner %d", PPhotoOwnerTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Photo - Comment %d", PPhotoCommentTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Photo - Location %d", PPhotoLocationTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Comment - User %d", PCommentUserTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Comment - User %d", PCommentUserTbl->GetNumRows().Val);
////  FeaturesOut->PutStrFmtLn("Photo - Tagger %d", PPhotoTaggerTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Tagger - Tag %d", PTaggerTagTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Total number of nodes = %d", Graph->GetNodes());
//  FeaturesOut->PutStrFmtLn("Total number of edges = %d", Graph->GetEdges());

  PSOut TimeOut = TFOut::New(PrefixPath + TStr("time.txt"), true);
  TimeOut->PutStrFmtLn("Experiment Weighted - %s - %s", PrefixPath.CStr(), (isPar ? "Parallel" : "Sequential"));
  TimeOut->PutStrFmtLn("Input Time = %f", GetCPUTimeUsage(ts1, ts2));
  TimeOut->PutStrFmtLn("Preprocessing Time = %f", GetCPUTimeUsage(ts2, ts3));
  TimeOut->PutStrFmtLn("Conversion Time = %f", GetCPUTimeUsage(ts3, ts4));
  TimeOut->PutStrFmtLn("Computing Time = %f", GetCPUTimeUsage(ts4, ts5)/nExps);
  TimeOut->PutStrFmtLn("Output Time = %f", GetCPUTimeUsage(ts5, ts6));

  return 0;
}
Example #16
0
int main(){
  // create scheme
  TTable::Schema AnimalS;
  AnimalS.Add(TPair<TStr,TTable::TYPE>("Animal", TTable::STR));
  AnimalS.Add(TPair<TStr,TTable::TYPE>("Size", TTable::STR));
  AnimalS.Add(TPair<TStr,TTable::TYPE>("Location", TTable::STR));
  AnimalS.Add(TPair<TStr,TTable::TYPE>("Number", TTable::INT));
  // create table
  PTable T1 = TTable::LoadSS("Animals1", AnimalS, "animals.txt");
  PTable T2 = TTable::LoadSS("Animals2", AnimalS, "animals.txt");
  // test Select
  // create predicate tree: find all animals that are big and african or medium and Australian
  TPredicate::TAtomicPredicate A1(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "big");  
  TPredicate::TPredicateNode N1(A1);  // Size == "big"
  TPredicate Pred(&N1);
  T1->Select(Pred);
  T1->SaveSS("animals_out_T1.txt");
  PTable Tj = T1->Join("Location", *T2, "Location");
  TStrV GroupBy;
  GroupBy.Add("Animals1.Animal");
  GroupBy.Add("Animals2.Animal");
  Tj->Group("AnimalPair", GroupBy);
  Tj->Unique("AnimalPair");
  //print table
  Tj->SaveSS("animals_out_Tj_1.txt");

  // Join on Location to get animal pairs
  // select the animal pairs of animals of the same size
  // group by (Animal, Animal)
  // unique by group idx
  /*
  PTable T3 = TTable::LoadSS("Animals3", AnimalS, "../../testfiles/animals.txt");
  //PTable T4 = TTable::LoadSS("Animals4", AnimalS, "../../testfiles/animals.txt");
  TTable T4 = *T3;
  T4.Name = "Animals4";
  PTable To = T3->Join("Location", T4, "Location");
  TPredicate::TAtomicPredicate A2(TPredicate::STR, false, TPredicate::EQ, "Animals3.Size", "Animals4.Size");  
  TPredicate::TPredicateNode N2(A2);
  TPredicate Pred2(&N2);
  To->Select(Pred2);
  TStrV GroupBy1;
  GroupBy1.Add("Animals3.Animal");
  GroupBy1.Add("Animals4.Animal");
  To->Group("AnimalPair", GroupBy1);
  To->Unique("AnimalPair");
  //print table
  To->SaveSS("../../testfiles/animals_out_To_1.txt");
  return 0;
  */

  PTable T3 = TTable::LoadSS("Animals3", AnimalS, "animals.txt");
  PTable T4 = TTable::LoadSS("Animals4", AnimalS, "animals.txt");
  PTable To = T3->Join("Location", *T4, "Location");
  TPredicate::TAtomicPredicate A2(TPredicate::STR, false, TPredicate::EQ, "Animals3.Size", "Animals4.Size");  
  TPredicate::TPredicateNode N2(A2);
  TPredicate Pred2(&N2);
  To->Select(Pred2);
  TStrV GroupBy1;
  GroupBy1.Add("Animals3.Animal");
  GroupBy1.Add("Animals4.Animal");
  To->Group("AnimalPair", GroupBy1);
  To->Unique("AnimalPair");
  //print table
  To->SaveSS("animals_out_To_1.txt");
  return 0;
}
Example #17
0
int main(int argc, char* []) {
  
  FILE * outfile;
  outfile = fopen ("benchmark.txt","w");
  
  
  //outfile << "Hello World!\n";
  fprintf(outfile, "Hello World!\n");
  PrintBenchmarks(outfile);
  
  int NTblCnt;
  int ETblCnt;
  printf("Enter the number of node tables and edge tables\n");
  scanf("%d %d", &NTblCnt, &ETblCnt);
  
  TVec<PTable> NTables;
  TVec<PTable> ETables;
  TVec<PTable> OutNTables;
  TVec<TPair<TInt, TInt> > Mapping;
  //TFltV Weights;
  
  TTableContext Context;
  //Read in tables
  for (int i = 0; i < NTblCnt; i++) {
    PTable P = AddNodeTable(Context);
    P->SaveSS("ntable_abc.txt");
    NTables.Add(P);
  }
  for (int i = 0; i < ETblCnt; i++) {
    ETables.Add(AddEdgeTable(Context));
    //printf("Enter the source and destination node table number (index starting from 0), and weight of the edges\n");
    printf("Enter the source and destination node table number (index starting from 0)\n");
    int SrcId;
    int DestId;
    //float Wt;
    //scanf("%d %d %f", &SrcId, &DestId, &Wt);
    scanf("%d %d", &SrcId, &DestId);
    printf("%d %d\n", SrcId, DestId);
    Mapping.Add(TPair<TInt, TInt>(SrcId, DestId));
    //Weights.Add(Wt);
  }
  
  /*
   char nodes_file[200];
   int NodesCnt;
   printf("Enter the filename containing bfs nodes and number of nodes to take");
   scanf("%s %d", nodes_file, &NodesCnt);
   
   FILE* infile = fopen (nodes_file,"r");
   char NodeTypeStr[100];
   char NodeIdStr[200];
   TIntV NodeTypeV;
   TStrV NodeIdV;
   for (int i = 0; i < NodesCnt; i++) {
   fscanf(infile, "%s %s", NodeTypeStr, NodeIdStr);
   int NodeTypeInt;
   TStr NodeIdStr2 = TStr(NodeIdStr);
   if (NodeTypeStr[0] == 'c') { NodeTypeInt = 0; }
   else if (NodeTypeStr[0] == 'l') { NodeTypeInt = 1; }
   else if (NodeTypeStr[0] == 'p') { NodeTypeInt = 2; }
   else if (NodeTypeStr[0] == 't') { NodeTypeInt = 3; }
   else if (NodeTypeStr[0] == 'u') { NodeTypeInt = 4; }
   else { printf("ERROR! Node Type not found!\n"); }
   NodeTypeV.Add(NodeTypeInt);
   NodeIdV.Add(NodeIdStr2);
   
   }
   */
  
  fprintf(outfile, "Tables Loaded\n");
  PrintBenchmarks(outfile);
  printf("Tables Loaded\n");
  
  //Convert to Graph
  printf("Converting to Graph\n");
  TSVNetMP Graph;
  TIntV NTypeV;
  TIntV ETypeV;
  TVec<THash<TStr, TInt> > NodesHV;
  //Adding node types
  for (int i = 0; i < NTblCnt; i++) {
    NTypeV.Add(Graph.AddNType());
  }
  printf("a\n");
  //Adding edge types
  for (int i = 0; i < ETblCnt; i++) {
    ETypeV.Add(Graph.AddEType(NTypeV[Mapping[i].Val1], NTypeV[Mapping[i].Val2]));
  }
  printf("a\n");
  //Adding nodes
  for (int i = 0; i < NTblCnt; i++) {
    THash<TStr, TInt> NodesH;
    NodesHV.Add(NodesH);
    int NType = NTypeV[i];
    Graph.ReserveNodes(i, NTables[i]->GetNumRows());
    for (int j = 0; j < NTables[i]->GetNumRows(); j++) {
      NodesHV[i].AddDat(NTables[i]->GetStrVal("NodeID", j), Graph.AddNode(NType));
    }
    Graph.SetNCnt(i, NTables[i]->GetNumRows());
  }
  //printf("a\n");
  //Adding edges
  for (int i = 0; i < ETblCnt; i++) {
    int EType = ETypeV[i];
    int SrcNType = NTypeV[Mapping[i].Val1];
    int DstNType = NTypeV[Mapping[i].Val2];
    Graph.ReserveEdges(i, ETables[i]->GetNumRows());
    for (int j = 0; j < ETables[i]->GetNumRows(); j++) {
      int SrcNId = NodesHV[SrcNType].GetDat(ETables[i]->GetStrVal("SrcID", j));
      int DstNId = NodesHV[DstNType].GetDat(ETables[i]->GetStrVal("DstID", j));
      Graph.AddEdge(SrcNId, DstNId, EType);
    }
    Graph.SetECnt(i, ETables[i]->GetNumRows());
  }
  //printf("a\n");
  
  TIntV EdgeVec1;
  //EdgeVec.Add(0);
  //EdgeVec.Add(2);
  TIntV NodeVec1;
  NodeVec1.Add(2);
  NodeVec1.Add(4);
  //TIntIntH Offsets1;
  
  TIntV EdgeVec2;
  //EdgeVec.Add(0);
  //EdgeVec.Add(2);
  TIntV NodeVec2;
  NodeVec2.Add(2);
  NodeVec2.Add(3);
  NodeVec2.Add(4);
  //TIntIntH Offsets2;
  
  
  TIntV EdgeVec3;
  //EdgeVec.Add(0);
  //EdgeVec.Add(2);
  TIntV NodeVec3;
  NodeVec3.Add(0);
  NodeVec3.Add(2);
  NodeVec3.Add(3);
  NodeVec3.Add(4);
  //TIntIntH Offsets3;
  
  int iter_count = 10;
  fprintf(outfile, "Converted to Graph\n");
  PrintBenchmarks(outfile);
  printf("Converted to Graph\n");
  printf("Starting subgraph\n");
  
  PSVNetMP Graph1;
  PSVNetMP Graph2;
  PSVNetMP Graph3;
  
  Graph.InitializeTimeV(4);
  
  for (int iter = 0; iter < iter_count; iter++) {
    Graph1 = Graph.GetSubGraphMP(NodeVec1, EdgeVec1);
  }
  
  fprintf(outfile, "subgraph completed\n");
  PrintBenchmarks(outfile);
  Graph.PrintTimeV(outfile);
  printf("subgraph completed\n");
  
  Graph.InitializeTimeV(4);
  for (int iter = 0; iter < iter_count; iter++) {
    Graph2 = Graph.GetSubGraphMP(NodeVec2, EdgeVec2);
  }
  
  fprintf(outfile, "subgraph completed\n");
  PrintBenchmarks(outfile);
  Graph.PrintTimeV(outfile);
  printf("subgraph completed\n");
  
  Graph.InitializeTimeV(4);
  for (int iter = 0; iter < iter_count; iter++) {
    Graph3 = Graph.GetSubGraphMP(NodeVec3, EdgeVec3);
  }
  
  fprintf(outfile, "subgraph completed\n");
  PrintBenchmarks(outfile);
  Graph.PrintTimeV(outfile);
  printf("subgraph completed\n");
  
  printf("Original: Nodes = %d, edges = %d\n", Graph.GetNodes(), Graph.GetEdges());
  printf("Subgraph1: Nodes = %d, edges = %d\n", Graph1->GetNodes(), Graph1->GetEdges());
  printf("Subgraph2: Nodes = %d, edges = %d\n", Graph2->GetNodes(), Graph2->GetEdges());
  printf("Subgraph3: Nodes = %d, edges = %d\n", Graph3->GetNodes(), Graph3->GetEdges());
  //printf("%d\n", Graph.GetSrcNId(2, 0));
  
  
  //Store bfs output
  /*
   printf("Storing Bfs output\n");
   FILE * outpr;
   outpr = fopen ("output_bfs.txt","w");
   for (int i = 0; i < NTblCnt; i++) {
   for (int j = 0; j < NTables[i]->GetNumRows(); j++) {
   fprintf(outpr, "%d \t%s \t%d\n", i, (NTables[i]->GetStrVal("NodeID", j)).CStr(), int(BfsLevelHV[i].GetDat(NodesHV[i].GetDat(NTables[i]->GetStrVal("NodeID", j)))));
   }
   }*/
  
  fprintf(outfile, "Output stored\n");
  PrintBenchmarks(outfile);
  printf("Output stored\n");
  
  
  fclose(outfile);
  //fclose(infile);
  //fclose(outpr);
}
Example #18
0
int main() {
    // create scheme
    TTable::Schema AnimalS;
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Animal", TTable::STR));
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Size", TTable::STR));
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Location", TTable::STR));
    AnimalS.Add(TPair<TStr,TTable::TYPE>("Number", TTable::INT));
    // create table
    PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
    //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
    T->Unique("Animal");
    //TTable Ts = *T;  not working because of problem with copy-c'tor
    PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "animals.txt");
    Ts->Unique("Animal");

    // test Select
    // create predicate tree: find all animals that are big and african or medium and Australian
    TPredicate::TAtomicPredicate A1(TPredicate::STR, true, TPredicate::EQ, "Location", "", 0, 0, "Africa");
    TPredicate::TPredicateNode N1(A1);  // Location == "Africa"
    TPredicate::TAtomicPredicate A2(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "big");
    TPredicate::TPredicateNode N2(A2);  // Size == "big"
    TPredicate::TPredicateNode N3(TPredicate::AND);
    N3.AddLeftChild(&N1);
    N3.AddRightChild(&N2);
    TPredicate::TAtomicPredicate A4(TPredicate::STR, true, TPredicate::EQ, "Location", "", 0, 0, "Australia");
    TPredicate::TPredicateNode N4(A4);
    TPredicate::TAtomicPredicate A5(TPredicate::STR, true, TPredicate::EQ, "Size", "", 0, 0, "medium");
    TPredicate::TPredicateNode N5(A5);
    TPredicate::TPredicateNode N6(TPredicate::AND);
    N6.AddLeftChild(&N4);
    N6.AddRightChild(&N5);
    TPredicate::TPredicateNode N7(TPredicate::OR);
    N7.AddLeftChild(&N3);
    N7.AddRightChild(&N6);
    TPredicate Pred(&N7);
    Ts->Select(Pred);

    TStrV GroupBy;
    GroupBy.Add("Location");
    T->Group("LocationGroup", GroupBy);
    GroupBy.Add("Size");
    T->Group("LocationSizeGroup", GroupBy);
    T->Count("LocationCount", "Location");
    PTable Tj = T->Join("Location", *Ts, "Location");
    //print table
    T->SaveSS("animals_out_T.txt");
    Ts->SaveSS("animals_out_Ts.txt");
    Tj->SaveSS("animals_out_Tj.txt");
    return 0;
}
int main(int argc, char** argv) {
    TBool debug = false;
    TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags";
    if(debug) {
        TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";
    }
    Schema TagS;
    TagS.Add(TPair<TStr,TAttrType>("UserId", atInt));
    TagS.Add(TPair<TStr,TAttrType>("Tag", atStr));
    // Load a TTable object and benchmark how long it takes to iterate over all of its records
    TTableContext Context;
    TTable::SetMP(false);

    //float ft0, ft1, ft2, ft3;
    //float mu0, mu1, mu2, mu3;
    float ft_max;
    float mu_max;
    //float tdiff1, tdiff2, dfiff3;
    //float mdiff1, mdiff2, mdiff3;

    //getcpumem(&ft0,&mu0);
    timeval timer0;
    gettimeofday(&timer0, NULL);
    double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
    PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context);
    gettimeofday(&timer0, NULL);
    double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
    printf("Time to load tags table: %f\n", t2 - t1);
    printf("Table Size:\n");
    Tags->PrintSize();
    Tags->PrintContextSize();
    //getcpumem(&ft1,&mu1);
    //tdiff1 = ft1 - ft0;
    //mdiff1 = mu1 - mu0;
    //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff1, mdiff1);
    getmaxcpumem(&ft_max, &mu_max);
    printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
    printf("\n");

    timeval timer2;
    gettimeofday(&timer2, NULL);
    t1 = timer2.tv_sec + (timer2.tv_usec/1000000.0);
    printf("start join on user\n");
    PTable TagsJoinUser = Tags->SelfJoin("UserId");
    printf("finish join on user\n");
    gettimeofday(&timer2, NULL);
    t2 = timer2.tv_sec + (timer2.tv_usec/1000000.0);
    printf("Time to join on user id column: %f\n", t2 - t1);
    printf("Table Size:\n");
    TagsJoinUser->PrintSize();
    //getcpumem(&ft2,&mu2);
    if(debug) {
        TagsJoinUser->SaveSS(TagsFnm + "_join_user.tsv");
    }
    //tdiff2 = ft2 - ft1;
    //mdiff2 = mu2 - mu1;
    //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff2, mdiff2);
    getmaxcpumem(&ft_max, &mu_max);
    printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
    printf("\n");

    timeval timer3;
    gettimeofday(&timer3, NULL);
    t1 = timer3.tv_sec + (timer3.tv_usec/1000000.0);
    PTable JavaTags = TTable::New(TagS, Context);
    TIntV SelectedRows;
    if(debug) {
        Tags->SelectAtomicConst(TStr("Tag"), TStr("c#"), EQ, SelectedRows, JavaTags, false, true);
    } else {
        Tags->SelectAtomicConst(TStr("Tag"), TStr("java"), EQ, SelectedRows, JavaTags, false, true);
    }
    gettimeofday(&timer3, NULL);
    t2 = timer3.tv_sec + (timer3.tv_usec/1000000.0);
    printf("Time to select java users: %f\n", t2 - t1);
    printf("Table Size:\n");
    JavaTags->PrintSize();
    //getcpumem(&ft3,&mu3);
    if(debug) {
        JavaTags->SaveSS(TagsFnm + "_select.tsv");
    }
    //tdiff3 = ft3 - ft2;
    //mdiff3 = mu3 - mu2;
    //printf("time: %0.3f seconds, memory: %0.3f MB\n", tdiff3, mdiff3);
    getmaxcpumem(&ft_max, &mu_max);
    printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
    printf("\n");
    return 0;
}
Example #20
0
void test_strs() {

  TTableContext context;
  Schema schema;
  schema.Add(TPair<TStr,TAttrType>("src",atStr));
  schema.Add(TPair<TStr,TAttrType>("dst",atStr));
  TStr wikifilename = "/dfs/scratch0/viswa/wiki_Vote.txt";

  PTable wikitable = TTable::LoadSS(schema, wikifilename, &context, '\t', TBool(false));

  TIntV rowidx1, rowidx2, rowidx3;
  double delta1, delta2, delta3;
  struct timeval start, end;
  printf("Str: Before indexing ...\n");
  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx1 = wikitable->GetStrRowIdxByMap("src", 7096);
  gettimeofday(&end, NULL);
  delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx2 = wikitable->GetStrRowIdxByMap("src", 7114);
  gettimeofday(&end, NULL);
  delta2 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx3 = wikitable->GetStrRowIdxByMap("src", 0);
  gettimeofday(&end, NULL);
  delta3 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;


  printf("Time 1: %f\tTime 2: %f\t Time 3: %f\n", delta1, delta2, delta3);
  printf("%d %d\n", rowidx1[0], rowidx2[0]);

  printf("Row Idx3: ");
  for (int i = 0 ; i < rowidx3.Len() ; i++ ) {
    printf("%d ", rowidx3[i]);
  }
  printf("\n");

  gettimeofday(&start, NULL);
  wikitable->RequestIndexStrMap("src");
  gettimeofday(&end, NULL);
  delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;
  printf("Str: Time to index: %f\n", delta1);


  printf("Str: After indexing ...\n");
  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx1 = wikitable->GetStrRowIdxByMap("src", 7096);
  gettimeofday(&end, NULL);
  delta1 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx2 = wikitable->GetStrRowIdxByMap("src", 7114);
  gettimeofday(&end, NULL);
  delta2 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  gettimeofday(&start, NULL);
  for (int i = 0 ; i < 100; i++) rowidx3 = wikitable->GetStrRowIdxByMap("src", 0);
  gettimeofday(&end, NULL);
  delta3 = ((end.tv_sec - start.tv_sec)* 1000000u + end.tv_usec - start.tv_usec)/1.e6;

  printf("Time 1: %f\tTime 2: %f\t Time 3: %f\n", delta1, delta2, delta3);
  printf("%d %d\n", rowidx1[0], rowidx2[0]);

  printf("Row Idx3: ");
  for (int i = 0 ; i < rowidx3.Len() ; i++ ) {
    printf("%d ", rowidx3[i]);
  }
  printf("\n");

  /*
  int i = 0;
  for (TRowIterator RowI = wikitable->BegRI(); RowI < wikitable->EndRI(); RowI++) {
    if (i > 100000)
    printf("%d %d %d\n", RowI.GetRowIdx(), RowI.GetStrMapByName("src"), RowI.GetStrMapByName("dst"));
    i++;
//    if (i > 10) break;
  }
  */
}