int TMultimodalGraphImplB::GetSubGraphMocked(const TIntV ModeIds) const {
  int NumVerticesAndEdges = 0;

  for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) {
    if (ModeIds.IsIn(CurI.GetDat())) {
      NumVerticesAndEdges++;
    }
  }

  for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) {
    int ModeId1 = ModeIds.GetVal(ModeIdx1);
    for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) {
      int ModeId2 = ModeIds.GetVal(ModeIdx2);
      TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2);
      if (!Graphs.IsKey(ModeIdsKey)) { continue; }
      const TNGraph& Graph = Graphs.GetDat(ModeIdsKey);
      for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) {
        for (int e = 0; e < it.GetOutDeg(); e++) {
          NumVerticesAndEdges += it.GetOutNId(e);
        }
      }
    }
  }

  return NumVerticesAndEdges;
}
TIntNNet TMultimodalGraphImplB::GetSubGraph(const TIntV ModeIds) const {
  TIntNNet SubGraph = TIntNNet();

  for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) {
    if (ModeIds.IsIn(CurI.GetDat())) {
      SubGraph.AddNode(CurI.GetKey(), CurI.GetDat());
    }
  }

  for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) {
    int ModeId1 = ModeIds.GetVal(ModeIdx1);
    for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) {
      int ModeId2 = ModeIds.GetVal(ModeIdx2);
      TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2);
      if (!Graphs.IsKey(ModeIdsKey)) { continue; }
      const TNGraph& Graph = Graphs.GetDat(ModeIdsKey);
      for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) {
        for (int e = 0; e < it.GetOutDeg(); e++) {
          SubGraph.AddEdge(it.GetId(), it.GetOutNId(e));
        }
      }
    }
  }
  printf("Number of nodes in SubGraph: %d...\n", SubGraph.GetNodes());
  printf("Number of edges in SubGraph: %d...\n", SubGraph.GetEdges());

  return SubGraph;
}
Exemple #3
0
void TNetInfBs::GenNoisyCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed,
					 	 	    const double& std_waiting_time, const double& std_beta,
					 	 	    const double& PercRndNodes, const double& PercRndRemoval) {
	TIntPrIntH EdgesUsedC; // list of used edges for a single cascade
	GenCascade(C, TModel, window, EdgesUsedC, delta, std_waiting_time, std_beta);

	// store keys
	TIntV KeyV;
	C.NIdHitH.GetKeyV(KeyV);

	// store first and last time
	double tbeg = TFlt::Mx, tend = TFlt::Mn;
	for (int i=0; i < KeyV.Len(); i++) {
		if (tbeg > C.NIdHitH.GetDat(KeyV[i]).Tm) tbeg = C.NIdHitH.GetDat(KeyV[i]).Tm;
		if (tend < C.NIdHitH.GetDat(KeyV[i]).Tm) tend = C.NIdHitH.GetDat(KeyV[i]).Tm;
	}

	// remove PercRndRemoval% of the nodes of the cascades
	if (PercRndRemoval > 0) {
		for (int i=KeyV.Len()-1; i >= 0; i--) {
			if (TFlt::Rnd.GetUniDev() < PercRndRemoval) {
				// remove from the EdgesUsedC the ones affected by the removal
				TIntPrV EdgesToRemove;
				for (TIntPrIntH::TIter EI = EdgesUsedC.BegI(); EI < EdgesUsedC.EndI(); EI++) {
					if ( (KeyV[i]==EI.GetKey().Val1 && C.IsNode(EI.GetKey().Val2) && C.GetTm(KeyV[i]) < C.GetTm(EI.GetKey().Val2)) ||
							(KeyV[i]==EI.GetKey().Val2 && C.IsNode(EI.GetKey().Val1) && C.GetTm(KeyV[i]) > C.GetTm(EI.GetKey().Val1)) ) {
						EI.GetDat() = EI.GetDat()-1;

						if (EI.GetDat()==0)
							EdgesToRemove.Add(EI.GetKey());
					}
				}

				for (int er=0; er<EdgesToRemove.Len(); er++)
					EdgesUsedC.DelKey(EdgesToRemove[er]);

				C.Del(KeyV[i]);
			}
		}

		// defrag the hash table, otherwise other functions can crash
		C.NIdHitH.Defrag();
	}

	// Substitute PercRndNodes% of the nodes for a random node at a random time
	if (PercRndNodes > 0) {
		for (int i=KeyV.Len()-1; i >= 0; i--) {
			if (TFlt::Rnd.GetUniDev() < PercRndNodes) {
				// remove from the EdgesUsedC the ones affected by the change
				TIntPrV EdgesToRemove;
				for (TIntPrIntH::TIter EI = EdgesUsedC.BegI(); EI < EdgesUsedC.EndI(); EI++) {
					if ( (KeyV[i]==EI.GetKey().Val1 && C.IsNode(EI.GetKey().Val2) && C.GetTm(KeyV[i]) < C.GetTm(EI.GetKey().Val2)) ||
							(KeyV[i]==EI.GetKey().Val2 && C.IsNode(EI.GetKey().Val1) && C.GetTm(KeyV[i]) > C.GetTm(EI.GetKey().Val1)) ) {
						EI.GetDat() = EI.GetDat()-1;

						if (EI.GetDat()==0)
							EdgesToRemove.Add(EI.GetKey());
					}
				}

				for (int er=0; er<EdgesToRemove.Len(); er++)
					EdgesUsedC.DelKey(EdgesToRemove[er]);

				printf("Old node n:%d t:%f --", KeyV[i].Val, C.GetTm(KeyV[i]));
				C.Del(KeyV[i]);

				// not repeating a label
				double tnew = 0;
				int keynew = -1;
				do {
					tnew = tbeg + TFlt::Rnd.GetUniDev()*(tend-tbeg);
					keynew = Graph->GetRndNId();
				} while (KeyV.IsIn(keynew));

				printf("New node n:%d t:%f\n", keynew, tnew);

				C.Add(keynew, tnew);
				KeyV.Add(keynew);
			}
		}
	}

	// add to the aggregate list (EdgesUsed)
	EdgesUsedC.Defrag();

	for (int i=0; i<EdgesUsedC.Len(); i++) {
		if (!EdgesUsed.IsKey(EdgesUsedC.GetKey(i))) EdgesUsed.AddDat(EdgesUsedC.GetKey(i)) = 0;

		EdgesUsed.GetDat(EdgesUsedC.GetKey(i)) += 1;
	}
}
Exemple #4
0
PBowDocBs TFtrGenBs::LoadCsv(TStr& FNm, const int& ClassId, 
        const TIntV& IgnoreIdV, const int& TrainLen) {

    // feature generators
	PFtrGenBs FtrGenBs = TFtrGenBs::New();
    // CSV parsing stuff
    PSIn SIn = TFIn::New(FNm); 
    char SsCh = ' '; TStrV FldValV;
    // read the headers and initialise the feature generators
    TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);  
    for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
        const TStr& FldVal = FldValV[FldValN];
        if (FldValN == ClassId) { 
            if (FldVal == "NOM") {
                FtrGenBs->PutClsFtrGen(TFtrGenNominal::New());
            } else if (FldVal == "MULTI-NOM") {
                FtrGenBs->PutClsFtrGen(TFtrGenMultiNom::New());
            } else {
                TExcept::Throw("Wrong class type '" + FldVal + "', should be NOM or MULTI-NOM!");
            }
        } else if (!IgnoreIdV.IsIn(FldValN)) {
            if (FldVal == TFtrGenNumeric::GetType()) {
				FtrGenBs->AddFtrGen(TFtrGenNumeric::New());
            } else if (FldVal == TFtrGenNominal::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenNominal::New());
            } else if (FldVal == TFtrGenToken::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenToken::New(
                    TSwSet::New(swstNone), TStemmer::New(stmtNone)));
            } else if (FldVal == TFtrGenSparseNumeric::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenSparseNumeric::New());
            } else if (FldVal == TFtrGenMultiNom::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenMultiNom::New());
            } else {
                TExcept::Throw("Wrong type '" + FldVal + "'!");
            }
        }
    }
    const int Flds = FldValV.Len();
    // read the lines and feed them to the feature generators
    int Recs = 0;
    while (!SIn->Eof()) {
        if (Recs == TrainLen) { break; }
        Recs++; printf("%7d\r", Recs);
        TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
        // make sure line still has the same number of fields as the header
        EAssertR(FldValV.Len() == Flds, 
            TStr::Fmt("Wrong number of fields in line %d! Found %d and expected %d!",
            Recs + 1, FldValV.Len(), Flds));
        // go over lines
        try {
			TStrV FtrValV;
            for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
                const TStr& FldVal = FldValV[FldValN];
                if (FldValN == ClassId) { 
					FtrGenBs->UpdateCls(FldVal);
                } else if (!IgnoreIdV.IsIn(FldValN)) {
                    FtrValV.Add(FldVal);
                }
            }
			FtrGenBs->Update(FtrValV);
        } catch (PExcept Ex) {
            TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!", 
                Recs+1, Ex->GetMsgStr().CStr()));
        }
    }
    // read the file again and feed it to the training set
    PBowDocBs BowDocBs = FtrGenBs->MakeBowDocBs();
    // we read and ignore the headers since we parsed them already 
    SIn = TFIn::New(FNm); SsCh = ' ';
    TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);  
    // read the lines and feed them to the training set
    Recs = 0;
    while (!SIn->Eof()){
        Recs++; printf("%7d\r", Recs);
        TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
        // make sure line still has the same number of fields as the header
        EAssertR(FldValV.Len() == Flds, 
            TStr::Fmt("Wrong number of fields in line %s! Found %d and expected %d!",
            Recs + 1, FldValV.Len(), Flds));
        // go over lines and construct the sparse vector
		TStrV FtrValV; TStr ClsFtrVal;
        try {
            for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
                const TStr& FldVal = FldValV[FldValN];
                if (FldValN == ClassId) { 
                    ClsFtrVal = FldVal;
                } else if (!IgnoreIdV.IsIn(FldValN)) {
                    FtrValV.Add(FldVal);
                }
            }
        } catch (PExcept Ex) {
            TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!", 
                Recs+1, Ex->GetMsgStr().CStr()));
        }
        // add the feature vector to trainsets
		FtrGenBs->AddBowDoc(BowDocBs, TStr::Fmt("Line-%d", Recs), FtrValV, ClsFtrVal);
    }
	// prepare training and testing doc ids
	TIntV AllDIdV; BowDocBs->GetAllDIdV(AllDIdV); IAssert(AllDIdV.IsSorted());
	TIntV TrainDIdV = AllDIdV; TrainDIdV.Trunc(TrainLen);
	BowDocBs->PutTrainDIdV(TrainDIdV);
	TIntV TestDIdV = AllDIdV; TestDIdV.Minus(TrainDIdV);
	BowDocBs->PutTestDIdV(TestDIdV);

    return BowDocBs;
}