void TTable::AddNodeAttributes(PNEAGraph& Graph, THash<TFlt, TInt>& FSrNodeMap, THash<TFlt, TInt>& FDsNodeMap) { for (TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) { // Add Source and Destination node attributes. for (int i = 0; i < SrcNodeAttrV.Len(); i++) { TStr SrcColAttr = SrcNodeAttrV[i]; TYPE CT = GetColType(SrcColAttr); int Idx = GetColIdx(SrcColAttr); TInt RowIdx = RowI.GetRowIdx(); if (CT == INT) { Graph->AddIntAttrDatN(GetNId(SrcCol, RowIdx, FSrNodeMap, FDsNodeMap), IntCols[Idx][RowIdx], SrcColAttr); } else if (CT == FLT) { Graph->AddFltAttrDatN(GetNId(SrcCol, RowIdx, FSrNodeMap, FDsNodeMap), FltCols[Idx][RowIdx], SrcColAttr); } else { Graph->AddStrAttrDatN(GetNId(SrcCol, RowIdx, FSrNodeMap, FDsNodeMap), StrColVals.GetStr(StrColMaps[Idx][RowIdx]), SrcColAttr); } } for (int i = 0; i < DstNodeAttrV.Len(); i++) { TStr DstColAttr = DstNodeAttrV[i]; TYPE CT = GetColType(DstColAttr); int Idx = GetColIdx(DstColAttr); TInt RowIdx = RowI.GetRowIdx(); if (CT == INT) { Graph->AddIntAttrDatN(GetNId(SrcCol, RowIdx, FSrNodeMap, FDsNodeMap), IntCols[Idx][RowIdx], DstColAttr); } else if (CT == FLT) { Graph->AddFltAttrDatN(GetNId(SrcCol, RowIdx, FSrNodeMap, FDsNodeMap), FltCols[Idx][RowIdx], DstColAttr); } else { Graph->AddStrAttrDatN(GetNId(SrcCol, RowIdx, FSrNodeMap, FDsNodeMap), StrColVals.GetStr(StrColMaps[Idx][RowIdx]), DstColAttr); } } } }
void TTable::AddEdgeAttributes(PNEAGraph& Graph) { for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) { typedef THash<TStr,TPair<TYPE,TInt> >::TIter TColIter; for (int i = 0; i < EdgeAttrV.Len(); i++) { TStr ColName = EdgeAttrV[i]; TYPE T = GetColType(ColName); TInt Index = GetColIdx(ColName); TInt Ival; TFlt Fval; TStr Sval; switch (T) { case INT: Ival = IntCols[Index][RowI.GetRowIdx()]; Graph->AddIntAttrDatE(RowI.GetRowIdx(), Ival, ColName); break; case FLT: Fval = FltCols[Index][RowI.GetRowIdx()]; Graph->AddFltAttrDatE(RowI.GetRowIdx(), Fval, ColName); break; case STR: Sval = StrColVals.GetStr(StrColMaps[Index][RowI.GetRowIdx()]); Graph->AddStrAttrDatE(RowI.GetRowIdx(), Sval, ColName); break; } } } }
// TODO: simplify using TRowIteratorWithRemove void TTable::KeepSortedRows(const TIntV& KeepV){ // need to remove first rows while(FirstValidRow != KeepV[0]){ RemoveRow(FirstValidRow);} // at this point we know the first row will stay - i.e. FirstValidRow == KeepV[0] TInt KeepIdx = 1; TRowIterator RowI = BegRI(); while(RowI < EndRI()){ if(KeepV.Len() > KeepIdx){ if(KeepV[KeepIdx] == Next[RowI.GetRowIdx()]){ KeepIdx++; RowI++; } else{ RemoveRow(Next[RowI.GetRowIdx()]); } // covered all of KeepV. remove the rest of the rows // current RowI.CurrRowIdx is the last element of KeepV } else{ while(Next[RowI.GetRowIdx()] != Last){ RemoveRow(Next[RowI.GetRowIdx()]); } // removed the rest of the rows. increment RowI to EndRI RowI++; } } }
/////////////////////////////////////////////////////////////////////////////// // Initialization and helper methods for TempMotifCounter TempMotifCounter::TempMotifCounter(const TStr& filename) { // First load the static graph static_graph_ = TSnap::LoadEdgeList<PNGraph>(filename, 0, 1); int max_nodes = static_graph_->GetMxNId(); temporal_data_ = TVec< THash<TInt, TIntV> >(max_nodes); // Formulate input File Format: // source_node destination_node timestamp TTableContext context; Schema temp_graph_schema; temp_graph_schema.Add(TPair<TStr,TAttrType>("source", atInt)); temp_graph_schema.Add(TPair<TStr,TAttrType>("destination", atInt)); temp_graph_schema.Add(TPair<TStr,TAttrType>("time", atInt)); // Load the temporal graph PTable data_ptr = TTable::LoadSS(temp_graph_schema, filename, &context, ' '); TInt src_idx = data_ptr->GetColIdx("source"); TInt dst_idx = data_ptr->GetColIdx("destination"); TInt tim_idx = data_ptr->GetColIdx("time"); for (TRowIterator RI = data_ptr->BegRI(); RI < data_ptr->EndRI(); RI++) { TInt row_idx = RI.GetRowIdx(); int src = data_ptr->GetIntValAtRowIdx(src_idx, row_idx).Val; int dst = data_ptr->GetIntValAtRowIdx(dst_idx, row_idx).Val; int tim = data_ptr->GetIntValAtRowIdx(tim_idx, row_idx).Val; temporal_data_[src](dst).Add(tim); } }
void TTable::Count(TStr CountColName, TStr Col){ if(!ColTypeMap.IsKey(Col)){TExcept::Throw("no such column " + Col);} TIntV CntCol(NumRows); switch(GetColType(Col)){ case INT:{ THash<TInt,TIntV> T; // can't really estimate the size of T for constructor hinting TIntV& Column = IntCols[GetColIdx(Col)]; GroupByIntCol(Col, T, TIntV(0), true); for(TRowIterator it = BegRI(); it < EndRI(); it++){ CntCol[it.GetRowIdx()] = T.GetDat(Column[it.GetRowIdx()]).Len(); } break; } case FLT:{ THash<TFlt,TIntV> T; TFltV& Column = FltCols[GetColIdx(Col)]; GroupByFltCol(Col, T, TIntV(0), true); for(TRowIterator it = BegRI(); it < EndRI(); it++){ CntCol[it.GetRowIdx()] = T.GetDat(Column[it.GetRowIdx()]).Len(); } break; } case STR:{ THash<TStr,TIntV> T; GroupByStrCol(Col, T, TIntV(0), true); for(TRowIterator it = BegRI(); it < EndRI(); it++){ CntCol[it.GetRowIdx()] = T.GetDat(GetStrVal(Col, it.GetRowIdx())).Len(); } } } // add count column IntCols.Add(CntCol); AddSchemaCol(CountColName, INT); ColTypeMap.AddDat(CountColName, TPair<TYPE,TInt>(INT, IntCols.Len()-1)); }
void TTable::GroupByStrCol(TStr GroupBy, THash<TStr,TIntV>& Grouping, const TIntV& IndexSet, TBool All) const{ if(!ColTypeMap.IsKey(GroupBy)){TExcept::Throw("no such column " + GroupBy);} if(GetColType(GroupBy) != STR){TExcept::Throw(GroupBy + " values are not of expected type string");} if(All){ // optimize for the common and most expensive case - itearte over all valid rows for(TRowIterator it = BegRI(); it < EndRI(); it++){ UpdateGrouping<TStr>(Grouping, it.GetStrAttr(GroupBy), it.GetRowIdx()); } } else{ // consider only rows in IndexSet for(TInt i = 0; i < IndexSet.Len(); i++){ if(IsRowValid(IndexSet[i])){ TInt RowIdx = IndexSet[i]; UpdateGrouping<TStr>(Grouping, GetStrVal(GroupBy, RowIdx), RowIdx); } } } }
void TTable::GroupByFltCol(TStr GroupBy, THash<TFlt,TIntV>& grouping, const TIntV& IndexSet, TBool All) const{ if(!ColTypeMap.IsKey(GroupBy)){TExcept::Throw("no such column " + GroupBy);} if(GetColType(GroupBy) != FLT){TExcept::Throw(GroupBy + " values are not of expected type float");} if(All){ // optimize for the common and most expensive case - itearte over only valid rows for(TRowIterator it = BegRI(); it < EndRI(); it++){ UpdateGrouping<TFlt>(grouping, it.GetFltAttr(GroupBy), it.GetRowIdx()); } } else{ // consider only rows in IndexSet for(TInt i = 0; i < IndexSet.Len(); i++){ if(IsRowValid(IndexSet[i])){ TInt RowIdx = IndexSet[i]; const TFltV& Col = FltCols[GetColIdx(GroupBy)]; UpdateGrouping<TFlt>(grouping, Col[RowIdx], RowIdx); } } } }
void TTable::SaveSS(const TStr& OutFNm){ FILE* F = fopen(OutFNm.CStr(), "w"); // debug if(F == NULL){ printf("failed to open file %s\n", OutFNm.CStr()); perror("fail "); return; } TInt L = S.Len(); // print title (schema) for(TInt i = 0; i < L-1; i++){ fprintf(F, "%s\t", GetSchemaColName(i).CStr()); } fprintf(F, "%s\n", GetSchemaColName(L-1).CStr()); // print table contents for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++){ for(TInt i = 0; i < L; i++){ char C = (i == L-1) ? '\n' : '\t'; switch(GetSchemaColType(i)){ case INT:{ fprintf(F, "%d%c", RowI.GetIntAttr(GetSchemaColName(i)).Val, C); break; } case FLT:{ fprintf(F, "%f%c", RowI.GetFltAttr(GetSchemaColName(i)).Val, C); break; } case STR:{ fprintf(F, "%s%c", RowI.GetStrAttr(GetSchemaColName(i)).CStr(), C); break; } } } } fclose(F); }
// wrong reading of string attributes void TTable::BuildGraphTopology(PNEAGraph& Graph, THash<TFlt, TInt>& FSrNodeMap, THash<TFlt, TInt>& FDsNodeMap) { TYPE SrCT = GetColType(SrcCol); TInt SrIdx = GetColIdx(SrcCol); TYPE DsCT = GetColType(DstCol); TInt DsIdx = GetColIdx(DstCol); TInt SrcCnt = 0; TInt DstCnt = 0; for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) { if (SrCT == INT && DsCT == INT) { Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]); Graph->AddNode(IntCols[DsIdx][RowI.GetRowIdx()]); Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], IntCols[DsIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == INT && DsCT == FLT) { Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]); TFlt val = FltCols[DsIdx][RowI.GetRowIdx()]; if (!FDsNodeMap.IsKey(val)) { FDsNodeMap.AddDat(val, DstCnt++); } Graph->AddNode(FDsNodeMap.GetDat(val)); Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], FDsNodeMap.GetDat(val)); } else if (SrCT == INT && DsCT == STR) { Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]); Graph->AddNode(StrColMaps[DsIdx][RowI.GetRowIdx()]); Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], StrColMaps[DsIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == FLT && DsCT == INT) { Graph->AddNode(IntCols[DsIdx][RowI.GetRowIdx()]); TFlt val = FltCols[SrIdx][RowI.GetRowIdx()]; if (!FSrNodeMap.IsKey(val)) { FSrNodeMap.AddDat(val, SrcCnt++); } Graph->AddNode(FSrNodeMap.GetDat(val)); Graph->AddEdge(FSrNodeMap.GetDat(val), IntCols[SrIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == FLT && DsCT == STR) { Graph->AddNode(StrColMaps[DsIdx][RowI.GetRowIdx()]); TFlt val = FltCols[SrIdx][RowI.GetRowIdx()]; if (!FSrNodeMap.IsKey(val)) { FSrNodeMap.AddDat(val, SrcCnt++); } Graph->AddNode(FSrNodeMap.GetDat(val)); Graph->AddEdge(FSrNodeMap.GetDat(val), IntCols[SrIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == FLT && DsCT == FLT) { TFlt val = FltCols[SrIdx][RowI.GetRowIdx()]; if (!FSrNodeMap.IsKey(val)) { FSrNodeMap.AddDat(val, SrcCnt++); } Graph->AddNode(FSrNodeMap.GetDat(val)); val = FltCols[DsIdx][RowI.GetRowIdx()]; if (!FDsNodeMap.IsKey(val)) { FDsNodeMap.AddDat(val, DstCnt++); } Graph->AddNode(FDsNodeMap.GetDat(val)); Graph->AddEdge(FSrNodeMap.GetDat(val), FDsNodeMap.GetDat(val), RowI.GetRowIdx()); } } }
// Q: Do we want to have any gurantees in terms of order of the 0t rows - i.e. // ordered by "this" table row idx as primary key and "Table" row idx as secondary key // This means only keeping joint row indices (pairs of original row indices), sorting them // and adding all rows in the end. Sorting can be expensive, but we would be able to pre-allocate // memory for the joint table.. PTable TTable::Join(TStr Col1, const TTable& Table, TStr Col2) { if(!ColTypeMap.IsKey(Col1)){ TExcept::Throw("no such column " + Col1); } if(!ColTypeMap.IsKey(Col2)){ TExcept::Throw("no such column " + Col2); } if (GetColType(Col1) != GetColType(Col2)) { TExcept::Throw("Trying to Join on columns of different type"); } // initialize result table PTable JointTable = InitializeJointTable(Table); // hash smaller table (group by column) TYPE ColType = GetColType(Col1); TBool ThisIsSmaller = (NumValidRows <= Table.NumValidRows); const TTable& TS = ThisIsSmaller ? *this : Table; const TTable& TB = ThisIsSmaller ? Table : *this; TStr ColS = ThisIsSmaller ? Col1 : Col2; TStr ColB = ThisIsSmaller ? Col2 : Col1; // iterate over the rows of the bigger table and check for "collisions" // with the group keys for the small table. switch(ColType){ case INT:{ THash<TInt, TIntV> T; TS.GroupByIntCol(Col1, T, TIntV(), true); for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){ TInt K = RowI.GetIntAttr(ColB); if(T.IsKey(K)){ TIntV& Group = T.GetDat(K); for(TInt i = 0; i < Group.Len(); i++){ if(ThisIsSmaller){ JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx()); } else{ JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]); } } } } break; } case FLT:{ THash<TFlt, TIntV> T; TS.GroupByFltCol(Col1, T, TIntV(), true); for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){ TFlt K = RowI.GetFltAttr(ColB); if(T.IsKey(K)){ TIntV& Group = T.GetDat(K); for(TInt i = 0; i < Group.Len(); i++){ if(ThisIsSmaller){ JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx()); } else{ JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]); } } } } break; } case STR:{ THash<TStr, TIntV> T; TS.GroupByStrCol(Col1, T, TIntV(), true); for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){ TStr K = RowI.GetStrAttr(ColB); if(T.IsKey(K)){ TIntV& Group = T.GetDat(K); for(TInt i = 0; i < Group.Len(); i++){ if(ThisIsSmaller){ JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx()); } else{ JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]); } } } } } break; } return JointTable; }
/** * Used for benchmarking sorting by source algorithm. * Takes as input starting point of * a top cascade and outputs time taken for casacade detection. * Input : Source, Dest, Start, Duration * Output : Prints the time for cascade detection */ int main(int argc,char* argv[]) { TTableContext Context; Schema TimeS; TimeS.Add(TPair<TStr,TAttrType>("Source",atInt)); TimeS.Add(TPair<TStr,TAttrType>("Dest",atInt)); TimeS.Add(TPair<TStr,TAttrType>("Start",atInt)); TimeS.Add(TPair<TStr,TAttrType>("Duration",atInt)); PTable P1 = TTable::LoadSS(TimeS,"./../../../../datasets/temporal/yemen_call_201001.txt",&Context,' '); TIntV MapV; TStrV SortBy; SortBy.Add("Source"); P1->Order(SortBy); TIntV Source; // Sorted vec of start time P1->ReadIntCol("Source",Source); for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) { MapV.Add(RI.GetRowIdx()); } // Attribute to Int mapping TInt SIdx = P1->GetColIdx("Source"); TInt DIdx = P1->GetColIdx("Dest"); TInt StIdx = P1->GetColIdx("Start"); TInt DuIdx = P1->GetColIdx("Duration"); int W = atoi(argv[1]); int len = 0; // Find the starting point int TSource = atoi(argv[2]); int TDest = atoi(argv[3]); int TStart = atoi(argv[4]); int TDur = atoi(argv[5]); TInt RIdx; for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) { RIdx = RI.GetRowIdx(); int RSource = P1->GetIntValAtRowIdx(SIdx,RIdx).Val; int RDest = P1->GetIntValAtRowIdx(DIdx,RIdx).Val; int RStart = P1->GetIntValAtRowIdx(StIdx,RIdx).Val; int RDur = P1->GetIntValAtRowIdx(DuIdx,RIdx).Val; if (TSource == RSource && TDest == RDest && TStart == RStart && TDur == RDur) break; } // Start building the cascade from the start point clock_t st,et; st = clock(); for (int i = 0; i < 1; i++) { THashSet<TInt> VisitedH; TSnapQueue<TInt> EventQ; EventQ.Push(RIdx); VisitedH.AddKey(RIdx); while (!EventQ.Empty()) { TInt CIdx = EventQ.Top(); EventQ.Pop(); int CDest = P1->GetIntValAtRowIdx(DIdx,CIdx).Val; int CStart = P1->GetIntValAtRowIdx(StIdx,CIdx).Val; int CDur = P1->GetIntValAtRowIdx(DuIdx,CIdx).Val; // In line binary search int val = CDest; int lo = 0; int hi = Source.Len() - 1; int index = -1; while (hi >= lo) { int mid = lo + (hi - lo)/2; if (Source.GetVal(mid) > val) { hi = mid - 1;} else if (Source.GetVal(mid) < val) { lo = mid + 1;} else { index = mid; hi = mid - 1;} } // End of binary search int BIdx = index; for(int i = BIdx; i < Source.Len(); i++) { int PId = MapV.GetVal(i).Val; if (! VisitedH.IsKey(PId)) { int TSource = P1->GetIntValAtRowIdx(SIdx,PId).Val; int TStart = P1->GetIntValAtRowIdx(StIdx,PId).Val; if (TSource != CDest) { break; } if (TStart >= (CDur + CStart) && TStart - (CDur + CStart) <= W) { VisitedH.AddKey(PId); EventQ.Push(PId); } } } } len = VisitedH.Len(); } et = clock(); float diff = ((float) et - (float) st)/CLOCKS_PER_SEC; printf("Size %d,Time %f\n",len,diff); return 0; }