int TMultimodalGraphImplB::GetSubGraphMocked(const TIntV ModeIds) const { int NumVerticesAndEdges = 0; for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) { if (ModeIds.IsIn(CurI.GetDat())) { NumVerticesAndEdges++; } } for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) { int ModeId1 = ModeIds.GetVal(ModeIdx1); for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) { int ModeId2 = ModeIds.GetVal(ModeIdx2); TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2); if (!Graphs.IsKey(ModeIdsKey)) { continue; } const TNGraph& Graph = Graphs.GetDat(ModeIdsKey); for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) { for (int e = 0; e < it.GetOutDeg(); e++) { NumVerticesAndEdges += it.GetOutNId(e); } } } } return NumVerticesAndEdges; }
TIntNNet TMultimodalGraphImplB::GetSubGraph(const TIntV ModeIds) const { TIntNNet SubGraph = TIntNNet(); for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) { if (ModeIds.IsIn(CurI.GetDat())) { SubGraph.AddNode(CurI.GetKey(), CurI.GetDat()); } } for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) { int ModeId1 = ModeIds.GetVal(ModeIdx1); for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) { int ModeId2 = ModeIds.GetVal(ModeIdx2); TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2); if (!Graphs.IsKey(ModeIdsKey)) { continue; } const TNGraph& Graph = Graphs.GetDat(ModeIdsKey); for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) { for (int e = 0; e < it.GetOutDeg(); e++) { SubGraph.AddEdge(it.GetId(), it.GetOutNId(e)); } } } } printf("Number of nodes in SubGraph: %d...\n", SubGraph.GetNodes()); printf("Number of edges in SubGraph: %d...\n", SubGraph.GetEdges()); return SubGraph; }
void TNetInfBs::GenNoisyCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed, const double& std_waiting_time, const double& std_beta, const double& PercRndNodes, const double& PercRndRemoval) { TIntPrIntH EdgesUsedC; // list of used edges for a single cascade GenCascade(C, TModel, window, EdgesUsedC, delta, std_waiting_time, std_beta); // store keys TIntV KeyV; C.NIdHitH.GetKeyV(KeyV); // store first and last time double tbeg = TFlt::Mx, tend = TFlt::Mn; for (int i=0; i < KeyV.Len(); i++) { if (tbeg > C.NIdHitH.GetDat(KeyV[i]).Tm) tbeg = C.NIdHitH.GetDat(KeyV[i]).Tm; if (tend < C.NIdHitH.GetDat(KeyV[i]).Tm) tend = C.NIdHitH.GetDat(KeyV[i]).Tm; } // remove PercRndRemoval% of the nodes of the cascades if (PercRndRemoval > 0) { for (int i=KeyV.Len()-1; i >= 0; i--) { if (TFlt::Rnd.GetUniDev() < PercRndRemoval) { // remove from the EdgesUsedC the ones affected by the removal TIntPrV EdgesToRemove; for (TIntPrIntH::TIter EI = EdgesUsedC.BegI(); EI < EdgesUsedC.EndI(); EI++) { if ( (KeyV[i]==EI.GetKey().Val1 && C.IsNode(EI.GetKey().Val2) && C.GetTm(KeyV[i]) < C.GetTm(EI.GetKey().Val2)) || (KeyV[i]==EI.GetKey().Val2 && C.IsNode(EI.GetKey().Val1) && C.GetTm(KeyV[i]) > C.GetTm(EI.GetKey().Val1)) ) { EI.GetDat() = EI.GetDat()-1; if (EI.GetDat()==0) EdgesToRemove.Add(EI.GetKey()); } } for (int er=0; er<EdgesToRemove.Len(); er++) EdgesUsedC.DelKey(EdgesToRemove[er]); C.Del(KeyV[i]); } } // defrag the hash table, otherwise other functions can crash C.NIdHitH.Defrag(); } // Substitute PercRndNodes% of the nodes for a random node at a random time if (PercRndNodes > 0) { for (int i=KeyV.Len()-1; i >= 0; i--) { if (TFlt::Rnd.GetUniDev() < PercRndNodes) { // remove from the EdgesUsedC the ones affected by the change TIntPrV EdgesToRemove; for (TIntPrIntH::TIter EI = EdgesUsedC.BegI(); EI < EdgesUsedC.EndI(); EI++) { if ( (KeyV[i]==EI.GetKey().Val1 && C.IsNode(EI.GetKey().Val2) && C.GetTm(KeyV[i]) < C.GetTm(EI.GetKey().Val2)) || (KeyV[i]==EI.GetKey().Val2 && C.IsNode(EI.GetKey().Val1) && C.GetTm(KeyV[i]) > C.GetTm(EI.GetKey().Val1)) ) { EI.GetDat() = EI.GetDat()-1; if (EI.GetDat()==0) EdgesToRemove.Add(EI.GetKey()); } } for (int er=0; er<EdgesToRemove.Len(); er++) EdgesUsedC.DelKey(EdgesToRemove[er]); printf("Old node n:%d t:%f --", KeyV[i].Val, C.GetTm(KeyV[i])); C.Del(KeyV[i]); // not repeating a label double tnew = 0; int keynew = -1; do { tnew = tbeg + TFlt::Rnd.GetUniDev()*(tend-tbeg); keynew = Graph->GetRndNId(); } while (KeyV.IsIn(keynew)); printf("New node n:%d t:%f\n", keynew, tnew); C.Add(keynew, tnew); KeyV.Add(keynew); } } } // add to the aggregate list (EdgesUsed) EdgesUsedC.Defrag(); for (int i=0; i<EdgesUsedC.Len(); i++) { if (!EdgesUsed.IsKey(EdgesUsedC.GetKey(i))) EdgesUsed.AddDat(EdgesUsedC.GetKey(i)) = 0; EdgesUsed.GetDat(EdgesUsedC.GetKey(i)) += 1; } }
PBowDocBs TFtrGenBs::LoadCsv(TStr& FNm, const int& ClassId, const TIntV& IgnoreIdV, const int& TrainLen) { // feature generators PFtrGenBs FtrGenBs = TFtrGenBs::New(); // CSV parsing stuff PSIn SIn = TFIn::New(FNm); char SsCh = ' '; TStrV FldValV; // read the headers and initialise the feature generators TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false); for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) { const TStr& FldVal = FldValV[FldValN]; if (FldValN == ClassId) { if (FldVal == "NOM") { FtrGenBs->PutClsFtrGen(TFtrGenNominal::New()); } else if (FldVal == "MULTI-NOM") { FtrGenBs->PutClsFtrGen(TFtrGenMultiNom::New()); } else { TExcept::Throw("Wrong class type '" + FldVal + "', should be NOM or MULTI-NOM!"); } } else if (!IgnoreIdV.IsIn(FldValN)) { if (FldVal == TFtrGenNumeric::GetType()) { FtrGenBs->AddFtrGen(TFtrGenNumeric::New()); } else if (FldVal == TFtrGenNominal::GetType()) { FtrGenBs->AddFtrGen(TFtrGenNominal::New()); } else if (FldVal == TFtrGenToken::GetType()) { FtrGenBs->AddFtrGen(TFtrGenToken::New( TSwSet::New(swstNone), TStemmer::New(stmtNone))); } else if (FldVal == TFtrGenSparseNumeric::GetType()) { FtrGenBs->AddFtrGen(TFtrGenSparseNumeric::New()); } else if (FldVal == TFtrGenMultiNom::GetType()) { FtrGenBs->AddFtrGen(TFtrGenMultiNom::New()); } else { TExcept::Throw("Wrong type '" + FldVal + "'!"); } } } const int Flds = FldValV.Len(); // read the lines and feed them to the feature generators int Recs = 0; while (!SIn->Eof()) { if (Recs == TrainLen) { break; } Recs++; printf("%7d\r", Recs); TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false); // make sure line still has the same number of fields as the header EAssertR(FldValV.Len() == Flds, TStr::Fmt("Wrong number of fields in line %d! Found %d and expected %d!", Recs + 1, FldValV.Len(), Flds)); // go over lines try { TStrV FtrValV; for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) { const TStr& FldVal = FldValV[FldValN]; if (FldValN == ClassId) { FtrGenBs->UpdateCls(FldVal); } else if (!IgnoreIdV.IsIn(FldValN)) { FtrValV.Add(FldVal); } } FtrGenBs->Update(FtrValV); } catch (PExcept Ex) { TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!", Recs+1, Ex->GetMsgStr().CStr())); } } // read the file again and feed it to the training set PBowDocBs BowDocBs = FtrGenBs->MakeBowDocBs(); // we read and ignore the headers since we parsed them already SIn = TFIn::New(FNm); SsCh = ' '; TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false); // read the lines and feed them to the training set Recs = 0; while (!SIn->Eof()){ Recs++; printf("%7d\r", Recs); TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false); // make sure line still has the same number of fields as the header EAssertR(FldValV.Len() == Flds, TStr::Fmt("Wrong number of fields in line %s! Found %d and expected %d!", Recs + 1, FldValV.Len(), Flds)); // go over lines and construct the sparse vector TStrV FtrValV; TStr ClsFtrVal; try { for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) { const TStr& FldVal = FldValV[FldValN]; if (FldValN == ClassId) { ClsFtrVal = FldVal; } else if (!IgnoreIdV.IsIn(FldValN)) { FtrValV.Add(FldVal); } } } catch (PExcept Ex) { TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!", Recs+1, Ex->GetMsgStr().CStr())); } // add the feature vector to trainsets FtrGenBs->AddBowDoc(BowDocBs, TStr::Fmt("Line-%d", Recs), FtrValV, ClsFtrVal); } // prepare training and testing doc ids TIntV AllDIdV; BowDocBs->GetAllDIdV(AllDIdV); IAssert(AllDIdV.IsSorted()); TIntV TrainDIdV = AllDIdV; TrainDIdV.Trunc(TrainLen); BowDocBs->PutTrainDIdV(TrainDIdV); TIntV TestDIdV = AllDIdV; TestDIdV.Minus(TrainDIdV); BowDocBs->PutTestDIdV(TestDIdV); return BowDocBs; }