Exemple #1
0
/// Returns the amount the flow can be augmented over the paths, 0 if no path can be found. ##TSnap::FindAugV
int FindAugV (const PNEANet &Net, const int& CapIndex, TIntV &Flow, TIntQ &FwdNodeQ, TIntH &PredEdgeH, TIntQ &BwdNodeQ, TIntH &SuccEdgeH, TIntV &MidToSrcAugV, TIntV &MidToSnkAugV, const int& SrcNId, const int& SnkNId) {
  int MidPtNId = IntFlowBiDBFS(Net, CapIndex, Flow, FwdNodeQ, PredEdgeH, BwdNodeQ, SuccEdgeH, SrcNId, SnkNId);
  if (MidPtNId == -1) { return 0; }
  int MinAug = TInt::Mx, NId = MidPtNId, AugFlow = 0;
  // Build the path from the midpoint back to the source by tracing through the PredEdgeH
  for (int EId = PredEdgeH.GetDat(NId); NId != SrcNId; EId = PredEdgeH.GetDat(NId)) {
    MidToSrcAugV.Add(EId);
    const TNEANet::TEdgeI &EI = Net->GetEI(EId);
    if (EI.GetSrcNId() == NId) {
      NId = EI.GetDstNId();
      AugFlow = Flow[EId];
    } else {
      NId = EI.GetSrcNId();
      AugFlow = Net->GetIntAttrIndDatE(EId, CapIndex) - Flow[EId];
    }
    if (AugFlow < MinAug) { MinAug = AugFlow; }
  }
  NId = MidPtNId;
  // Build the path from the midpoint back to the sink by tracing through the SuccEdgeH
  for (int EId = SuccEdgeH.GetDat(NId); NId != SnkNId; EId = SuccEdgeH.GetDat(NId)) {
    MidToSnkAugV.Add(EId);
    const TNEANet::TEdgeI &EI = Net->GetEI(EId);
    if (EI.GetDstNId() == NId) {
      NId = EI.GetSrcNId();
      AugFlow = Flow[EId];
    } else {
      NId = EI.GetDstNId();
      AugFlow = Net->GetIntAttrIndDatE(EId, CapIndex) - Flow[EId];
    }
    if (AugFlow < MinAug) { MinAug = AugFlow; }
  }
  return MinAug;
}
Exemple #2
0
TEST(SHMTest, LoadTables) {
  TStr Filename("test.graph");

  TTableContext Context;
  // Create schema.
  Schema GradeS;
  GradeS.Add(TPair<TStr,TAttrType>("A", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("B", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt));
  GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);
  RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5);
  PTable p1 = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols);
  TFOut OutStream(Filename);
  p1->Save(OutStream);

  TShMIn Shmin(Filename);
  PTable p2 = TTable::LoadShM(Shmin, &Context);

  EXPECT_EQ(p1->GetNumRows().Val, p2->GetNumRows().Val);
  EXPECT_EQ(p1->GetNumValidRows().Val, p2->GetNumValidRows().Val); 
  EXPECT_EQ(p1->GetIntVal("Grade 2011", 0).Val, p2->GetIntVal("Grade 2011", 0).Val);
  EXPECT_EQ(p1->GetIntVal("Grade 2013", 4).Val, p2->GetIntVal("Grade 2013", 4).Val);
}
Exemple #3
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  // create table
  PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);
  //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt");
  T->Unique("Animal");
  TTable Ts = *T;  // did we fix problem with copy-c'tor ?
  //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols);
  //Ts->Unique(AnimalUnique);

  // test Select
  // create predicate tree: find all animals that are big and african or medium and Australian
  TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa");  
  TPredicate::TPredicateNode N1(A1);  // Location == "Africa"
  TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big");  
  TPredicate::TPredicateNode N2(A2);  // Size == "big"
  TPredicate::TPredicateNode N3(AND);
  N3.AddLeftChild(&N1);
  N3.AddRightChild(&N2);
  TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia");  
  TPredicate::TPredicateNode N4(A4);  
  TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium");  
  TPredicate::TPredicateNode N5(A5); 
  TPredicate::TPredicateNode N6(AND);
  N6.AddLeftChild(&N4);
  N6.AddRightChild(&N5);
  TPredicate::TPredicateNode N7(OR);
  N7.AddLeftChild(&N3);
  N7.AddRightChild(&N6);
  TPredicate Pred(&N7);
  TIntV SelectedRows;
  Ts.Select(Pred, SelectedRows);

  TStrV GroupBy;
  GroupBy.Add("Location");
  T->Group(GroupBy, "LocationGroup");
  GroupBy.Add("Size");
  T->Group(GroupBy, "LocationSizeGroup");
  T->Count("LocationCount", "Location");
  PTable Tj = T->Join("Location", Ts, "Location");
  TStrV UniqueAnimals;
  UniqueAnimals.Add("Animals_1.Animal");
  UniqueAnimals.Add("Animals_2.Animal");
  Tj->Unique(UniqueAnimals, false);
  //print table
   T->SaveSS("tests/animals_out_T.txt");
   Ts.SaveSS("tests/animals_out_Ts.txt");
   Tj->SaveSS("tests/animals_out_Tj.txt");
  return 0;
}
Exemple #4
0
// Test subgraphs
void TestSubTNEGraphs() {
  PNEGraph Graph;
  PNEGraph Graph1;
  PNEGraph Graph2;
  PNEGraph Graph3;
  int i;
  TIntV NIdV;
  TIntV NIdV1;
  TIntV EIdV;

  Graph = GetTestTNEGraph();
  PrintGraph("TNEGraph", Graph);

  for (i = 10; i < 15; i++) {
    NIdV.Add(i);
  }

  Graph1 = TSnap::GetSubGraph(Graph, NIdV);
  PrintGraph("TNEGraph1", Graph1);

  for (i = 0; i < 20; i += 2) {
    NIdV1.Add(i);
  }

  Graph2 = TSnap::GetSubGraph(Graph, NIdV1);
  PrintGraph("TNEGraph2", Graph2);

  for (i = 0; i < 120; i += 2) {
    EIdV.Add(i);
  }

  Graph3 = TSnap::GetESubGraph(Graph, EIdV);
  PrintGraph("TNEGraph3", Graph3);
}
void TUStr::GetWordUStrV(TUStrV& WordUStrV){
  // clear word vector
  WordUStrV.Clr();
  // create boundaries
  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
  IAssert(Len()==WordBoundPV.Len()-1);
  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
  // traverse characters and bounds
  int UniChs=Len(); TIntV WordUniChV;
  for (int UniChN=0; UniChN<=UniChs; UniChN++){
    if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
      if (UniChN<UniChs){ // if not finish
        // if last-word-char or single-alphabetic-char
        if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
          WordUniChV.Add(UniChV[UniChN]); // add char
        }
      }
      if (!WordUniChV.Empty()){ // add current word to vector
        TUStr WordUStr(WordUniChV); // construct word from char-vector
        WordUStrV.Add(WordUStr); // add word to word-vector
        WordUniChV.Clr(false); // clear char-vector
      }
    } else {
      // add character to char-vector
      WordUniChV.Add(UniChV[UniChN]);
    }
  }
}
Exemple #6
0
TEST(TQQueueTest, Unlimited2) {
	try {
		TQQueue<TInt> Q(64, -1);
		ASSERT_TRUE(Q.Empty());
		TIntV Vec;
		Vec.Add(1);
		Vec.Add(2);
		Vec.Add(3);

		Q.PushV(Vec);
		ASSERT_EQ(Q.Front(), 1);
		ASSERT_EQ(Q.Back(), 3);
		ASSERT_EQ(Q.Len(), 3);
		TIntV Vec2; Q.GetSubValVec(0, 2, Vec2);
		ASSERT_EQ(Vec.Len(), Vec2.Len());
		for (int i = 0; i < 3; i++) {
			ASSERT_EQ(Vec[i], Vec2[i]);
			ASSERT_EQ(Q[i], Vec[i]);
		}

	} catch (PExcept& Except) {
		printf("Error: %s", Except->GetStr());
		throw Except;
	}
}
Exemple #7
0
int main(int argc, char* argv[]){
  //test1();
  TTableContext Context;

  // create scheme
  Schema PostS;
  PostS.Add(TPair<TStr,TAttrType>("Id", atInt));
  PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr));
  PostS.Add(TPair<TStr,TAttrType>("Score", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4);

  PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("OwnerUserId");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);
  P->Aggregate(cols, aaSum, "Score", "Sum");
  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);
  
  if (atoi(argv[1]) == 0) return 0;

  P->SaveSS("tests/p3.txt");

  return 0;
}
Exemple #8
0
// Test edge subgraph conversion
TEST(subgraph, TestConvertESubGraphs) {
  PNEGraph NEGraph;
  PNGraph NGraph;
  TIntV NIdV;
  TIntV EIdV;
  int i;

  NGraph = GetTestTNGraph();
  EXPECT_EQ(20,NGraph->GetNodes());
  EXPECT_EQ(60,NGraph->GetEdges());

  for (i = 0; i < 20; i += 2) {
    NIdV.Add(i);
  }

  // TODO: fix TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true), it fails
  // UNGraph = TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true);
  NEGraph = TSnap::ConvertGraph<PNEGraph>(NGraph);
  EXPECT_EQ(20,NEGraph->GetNodes());
  EXPECT_EQ(60,NEGraph->GetEdges());

  // select every second edge
  i = 0;
  for (TNEGraph::TEdgeI EI = NEGraph->BegEI(); EI < NEGraph->EndEI(); EI++) {
    if (i == 0) {
      EIdV.Add(EI.GetId());
    }
    i = (i + 1) % 2;
  }

  NGraph = TSnap::ConvertESubGraph<PNGraph>(NEGraph, EIdV);
  EXPECT_EQ(20,NGraph->GetNodes());
  EXPECT_EQ(30,NGraph->GetEdges());
}
Exemple #9
0
void TBPGraph::GetNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetNodes(), 0);
  for (int N=LeftH.FFirstKeyId(); LeftH.FNextKeyId(N); ) {
    NIdV.Add(LeftH.GetKey(N)); }
  for (int N=RightH.FFirstKeyId(); RightH.FNextKeyId(N); ) {
    NIdV.Add(RightH.GetKey(N)); }
}
Exemple #10
0
void TempMotifCounter::GetAllStaticTriangles(TIntV& Us, TIntV& Vs, TIntV& Ws) {
  Us.Clr();
  Vs.Clr();
  Ws.Clr();
  // Get degree ordering of the graph
  int max_nodes = static_graph_->GetMxNId();
  TVec<TIntPair> degrees(max_nodes);
  degrees.PutAll(TIntPair(0, 0));
  // Set the degree of a node to be the number of nodes adjacent to the node in
  // the undirected graph.
  TIntV nodes;
  GetAllNodes(nodes);
  #pragma omp parallel for schedule(dynamic)  
  for (int node_id = 0; node_id < nodes.Len(); node_id++) {
    int src = nodes[node_id];
    TIntV nbrs;
    GetAllNeighbors(src, nbrs);
    degrees[src] = TIntPair(nbrs.Len(), src);
  }
  degrees.Sort();
  TIntV order = TIntV(max_nodes);
  #pragma omp parallel for schedule(dynamic)  
  for (int i = 0; i < order.Len(); i++) {
    order[degrees[i].Dat] = i;
  }

  // Get triangles centered at a given node where that node is the smallest in
  // the degree ordering.
  #pragma omp parallel for schedule(dynamic)  
  for (int node_id = 0; node_id < nodes.Len(); node_id++) {
    int src = nodes[node_id];
    int src_pos = order[src];
    
    // Get all neighbors who come later in the ordering
    TIntV nbrs;
    GetAllNeighbors(src, nbrs);    
    TIntV neighbors_higher;
    for (int i = 0; i < nbrs.Len(); i++) {
      int nbr = nbrs[i];
      if (order[nbr] > src_pos) { neighbors_higher.Add(nbr); }
    }

    for (int ind1 = 0; ind1 < neighbors_higher.Len(); ind1++) {
      for (int ind2 = ind1 + 1; ind2 < neighbors_higher.Len(); ind2++) {
        int dst1 = neighbors_higher[ind1];
        int dst2 = neighbors_higher[ind2];
        // Check for triangle formation
        if (static_graph_->IsEdge(dst1, dst2) || static_graph_->IsEdge(dst2, dst1)) {
          #pragma omp critical
          {
            Us.Add(src);
            Vs.Add(dst1);
            Ws.Add(dst2);
          }
        }
      }
    }
  }
}
Exemple #11
0
// burn each link independently (forward with FwdBurnProb, backward with BckBurnProb)
void TForestFire::BurnExpFire() {
  const double OldFwdBurnProb = FwdBurnProb;
  const double OldBckBurnProb = BckBurnProb;
  const int NInfect = InfectNIdV.Len();
  const TNGraph& G = *Graph;
  TIntH BurnedNIdH;               // burned nodes
  TIntV BurningNIdV = InfectNIdV; // currently burning nodes
  TIntV NewBurnedNIdV;            // nodes newly burned in current step
  bool HasAliveNbrs;              // has unburned neighbors
  int NBurned = NInfect, NDiedFire=0;
  for (int i = 0; i < InfectNIdV.Len(); i++) {
    BurnedNIdH.AddDat(InfectNIdV[i]); }
  NBurnedTmV.Clr(false);  NBurningTmV.Clr(false);  NewBurnedTmV.Clr(false);
  for (int time = 0; ; time++) {
    NewBurnedNIdV.Clr(false);
    // for each burning node
    for (int node = 0; node < BurningNIdV.Len(); node++) {
      const int& BurningNId = BurningNIdV[node];
      const TNGraph::TNodeI Node = G.GetNI(BurningNId);
      HasAliveNbrs = false;
      NDiedFire = 0;
      // burn forward links  (out-links)
      for (int e = 0; e < Node.GetOutDeg(); e++) {
        const int OutNId = Node.GetOutNId(e);
        if (! BurnedNIdH.IsKey(OutNId)) { // not yet burned
          HasAliveNbrs = true;
          if (Rnd.GetUniDev() < FwdBurnProb) {
            BurnedNIdH.AddDat(OutNId);  NewBurnedNIdV.Add(OutNId);  NBurned++; }
        }
      }
      // burn backward links (in-links)
      if (BckBurnProb > 0.0) {
        for (int e = 0; e < Node.GetInDeg(); e++) {
          const int InNId = Node.GetInNId(e);
          if (! BurnedNIdH.IsKey(InNId)) { // not yet burned
            HasAliveNbrs = true;
            if (Rnd.GetUniDev() < BckBurnProb) {
              BurnedNIdH.AddDat(InNId);  NewBurnedNIdV.Add(InNId);  NBurned++; }
          }
        }
      }
      if (! HasAliveNbrs) { NDiedFire++; }
    }
    NBurnedTmV.Add(NBurned);
    NBurningTmV.Add(BurningNIdV.Len() - NDiedFire);
    NewBurnedTmV.Add(NewBurnedNIdV.Len());
    //BurningNIdV.AddV(NewBurnedNIdV);   // node is burning eternally
    BurningNIdV.Swap(NewBurnedNIdV);    // node is burning just 1 time step
    if (BurningNIdV.Empty()) break;
    FwdBurnProb = FwdBurnProb * ProbDecay;
    BckBurnProb = BckBurnProb * ProbDecay;
  }
  BurnedNIdV.Gen(BurnedNIdH.Len(), 0);
  for (int i = 0; i < BurnedNIdH.Len(); i++) {
    BurnedNIdV.Add(BurnedNIdH.GetKey(i)); }
  FwdBurnProb = OldFwdBurnProb;
  BckBurnProb = OldBckBurnProb;
}
Exemple #12
0
void TempMotifCounter::GetAllNeighbors(int node, TIntV& nbrs) {
  nbrs = TIntV();
  TNGraph::TNodeI NI = static_graph_->GetNI(node);
  for (int i = 0; i < NI.GetOutDeg(); i++) { nbrs.Add(NI.GetOutNId(i)); }
  for (int i = 0; i < NI.GetInDeg(); i++) {
    int nbr = NI.GetInNId(i);
    if (!NI.IsOutNId(nbr)) { nbrs.Add(nbr); }
  }
}
void TMultimodalGraphImplB::TNodeI::GetAdjacentModes(TIntV &AdjacentModes) const {
  int ModeId = NodeToModeMapping->GetDat(GetId());
  for (TGraphs::TIter it = Graphs->BegI(); it < Graphs->EndI(); it++) {
    if (it.GetKey().GetVal1() == ModeId) {
      AdjacentModes.Add(it.GetKey().GetVal2());
    } else if (it.GetKey().GetVal2() == ModeId) {
      AdjacentModes.Add(it.GetKey().GetVal1());
    }
  }
}
Exemple #14
0
void TTable::Defrag() {
  TInt FreeIndex = 0;
  TIntV Mapping;  // Mapping[old_index] = new_index/invalid
  for (TInt i = 0; i < Next.Len(); i++) {
    if (Next[i] != Invalid) {  
      // "first row" properly set beforehand
      if (FreeIndex == 0) {
        Assert (i == FirstValidRow);
        FirstValidRow = 0;
      }
 
      if (Next[i] != Last) { 
        Next[FreeIndex] = FreeIndex + 1;
        Mapping.Add(FreeIndex);
      } else {
        Next[FreeIndex] = Last;
        Mapping.Add(Last);
      }

      for (TInt j = 0; j < IntCols.Len(); j++) {
        IntCols[j][FreeIndex] = IntCols[j][i];
      }
      for (TInt j = 0; j < FltCols.Len(); j++) {
        FltCols[j][FreeIndex] = FltCols[j][i];
      }
      for (TInt j = 0; j < StrColMaps.Len(); j++) {
        StrColMaps[j][FreeIndex] = StrColMaps[j][i];
      }

      FreeIndex++;
    } else {
      NumRows--;
      Mapping.Add(Invalid);
    }
  }

  for(THash<TStr,THash<TInt,TIntV> >::TIter it = GroupMapping.BegI(); it < GroupMapping.EndI(); it++){
    THash<TInt,TIntV>& G = it->Dat;
    for(THash<TInt,TIntV>::TIter iit = G.BegI(); iit < G.EndI(); iit++) {
      TIntV& Group = iit->Dat;
      TInt FreeIndex = 0;
      for (TInt j=0; j < Group.Len(); j++) {
        if (Mapping[Group[j]] != Invalid) {
          Group[FreeIndex] = Mapping[Group[j]];
          FreeIndex++;
        }
      }
      // resize to get rid of end values
      Group.Trunc(FreeIndex);
    }
  }
  // should match, or bug somewhere
  Assert (NumValidRows == NumRows);
}
Exemple #15
0
/// estimate number of communities using cross validation
int TAGMFast::FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const double StepAlpha, const double StepBeta) {
    double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
    TIntV ComsV;
    ComsV.Add(MinComs);
    while (ComsV.Len() < DivComs) {
      int NewComs = int(ComsV.Last() * ComsGap);
      if (NewComs == ComsV.Last().Val) { NewComs++; }
      ComsV.Add(NewComs);
    }
    if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
    return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm + ".CV.likelihood", StepAlpha, StepBeta);
}
Exemple #16
0
void TStrUtil::GetWIdV(const TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV) {
  const int NotWId = -1;
  TChA ChA(CStr);
  TVec<char *> WrdV;
  TInt WId;
  TStrUtil::SplitWords(ChA, WrdV);
  WIdV.Clr(false);
  for (int w = 0; w < WrdV.Len(); w++) {
    if (StrH.IsKeyGetDat(WrdV[w], WId)) { WIdV.Add(WId); }
    else { WIdV.Add(NotWId); }
  }
}
Exemple #17
0
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D
void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds,
    TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) {
  TRnd RandomGenerator; // TODO: make this "more random" by incorporating time
  for (int i = 0; i < NumBands; ++i) {
    THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand)
    THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs)
    for (int j = 0; j < BandSize; ++j) {
      // Create new signature
      TVec < TMd5Sig > Signature;
      ShingleToQuoteIds.GetKeyV(Signature);
      Signature.Shuffle(RandomGenerator);

      // Place in bucket - not very efficient
      int SigLen = Signature.Len();
      for (int k = 0; k < SigLen; ++k) {
        TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]);
        for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) {
          TInt Key = l.GetKey();
          if (Inverted.IsKey(Key)) {
            TIntV CurSignature = Inverted.GetDat(Key);
            if (CurSignature.Len() <= j) {
              CurSignature.Add(k);
              Inverted.AddDat(Key, CurSignature);
            }
          } else {
            TIntV NewSignature;
            NewSignature.Add(k);
            Inverted.AddDat(Key, NewSignature);
          }
        }
      }
    }

    TIntV InvertedKeys;
    Inverted.GetKeyV(InvertedKeys);
    TInt InvertedLen = InvertedKeys.Len();
    for (int k = 0; k < InvertedLen; ++k) {
      TIntSet Bucket;
      TIntV Signature = Inverted.GetDat(InvertedKeys[k]);
      if (BandBuckets.IsKey(Signature)) {
        Bucket = BandBuckets.GetDat(Signature);
      }
      Bucket.AddKey(InvertedKeys[k]);
      BandBuckets.AddDat(Signature, Bucket);
    }

    SignatureBandBuckets.Add(BandBuckets);
    Err("%d out of %d band signatures computed\n", i + 1, NumBands);
  }
  Err("Minhash step complete!\n");
}
Exemple #18
0
// improved version
void GetMergeSortedV1(TIntV& NeighbourV, TNGraph::TNodeI NI) {
  int j = 0;
  int k = 0;
  int prev = -1;
  int indeg = NI.GetInDeg();
  int outdeg = NI.GetOutDeg();
  //while (j < NI.GetInDeg() && k < NI.GetOutDeg()) {
  if (indeg > 0  &&  outdeg > 0) {
    int v1 = NI.GetInNId(j);
    int v2 = NI.GetOutNId(k);
    while (1) {
      if (v1 <= v2) {
        if (prev != v1) {
          NeighbourV.Add(v1);
          prev = v1;
        }
        j += 1;
        if (j >= indeg) {
          break;
        }
        v1 = NI.GetInNId(j);
      } else {
        if (prev != v2) {
          NeighbourV.Add(v2);
          prev = v2;
        }
        k += 1;
        if (k >= outdeg) {
          break;
        }
        v2 = NI.GetOutNId(k);
      }
    }
  }
  while (j < indeg) {
    int v = NI.GetInNId(j);
    if (prev != v) {
      NeighbourV.Add(v);
      prev = v;
    }
    j += 1;
  }
  while (k < outdeg) {
    int v = NI.GetOutNId(k);
    if (prev != v) {
      NeighbourV.Add(v);
      prev = v;
    }
    k += 1;
  }
}
Exemple #19
0
/////////////////////////////////////////////////
// Trawling the web for emerging communities
// graph, left points to right
TTrawling::TTrawling(const PNGraph& Graph, const int& MinSupport) : MinSup(MinSupport) {
  TIntH ItemCntH;
  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    IAssert(NI.GetOutDeg()==0 || NI.GetInDeg()==0); // edges only point from left to right
    if (NI.GetOutDeg()==0) { continue; }
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      ItemCntH.AddDat(NI.GetOutNId(e)) += 1;
    }
  }

  TIntV RightV;
  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    IAssert(NI.GetOutDeg()==0 || NI.GetInDeg()==0); // edges only point from left to right
    if (NI.GetOutDeg()==0) { continue; }
    RightV.Clr(false);
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      const int itm = NI.GetOutNId(e);
      // only include items that already are above minimum support
      if (ItemCntH.GetDat(itm) >= MinSup) {
        RightV.Add(itm); }
    }
    if (! RightV.Empty()) {
      NIdSetH.AddDat(NI.GetId(), RightV);
    }
  }
  //
  for (int n = 0; n < NIdSetH.Len(); n++) {
    const TIntV& Set = NIdSetH[n];
    for (int s = 0; s < Set.Len(); s++) {
      SetNIdH.AddDat(Set[s]).Add(n);
    }
  }
}
void TVizMapContext::GetSelectDIdV(TIntV& DIdV) {
    DIdV.Gen(SelPointV.Len(), 0);
    const int SelPoints = SelPointV.Len();
    for (int SelPointN = 0; SelPointN < SelPoints; SelPointN++) {
        DIdV.Add(VizMapFrame->GetPoint(SelPointV[SelPointN])->GetDocId());
    }
}
Exemple #21
0
void TempMotifCounter::GetAllNodes(TIntV& nodes) {
  nodes = TIntV();
  for (TNGraph::TNodeI it = static_graph_->BegNI();
       it < static_graph_->EndNI(); it++) {
    nodes.Add(it.GetId());
  }
}
Exemple #22
0
// Test subgraphs
void TestSubTUNGraphs() {
  PUNGraph Graph;
  PUNGraph Graph1;
  PUNGraph Graph2;
  PUNGraph Graph3;
  int i;
  TIntV NIdV;
  TIntV NIdV1;

  Graph = GetTestTUNGraph();
  PrintGraph("TUNGraph", Graph);

  for (i = 10; i < 15; i++) {
    NIdV.Add(i);
  }

  Graph1 = TSnap::GetSubGraph(Graph, NIdV);
  PrintGraph("TUNGraph1", Graph1);

  Graph2 = TSnap::GetSubGraph(Graph, NIdV, true);
  PrintGraph("TUNGraph2", Graph2);

  for (i = 0; i < 20; i += 2) {
    NIdV1.Add(i);
  }

  Graph3 = TSnap::GetSubGraph(Graph, NIdV1, true);
  PrintGraph("TUNGraph3", Graph3);
}
void TNmObjBs::GetNmObjDIdV(
 const PBowDocBs& BowDocBs, TIntV& BowDIdV, 
 const TStr& NmObjStr1, const TStr& NmObjStr2) const {
  // get first named-object-id
  int NmObjId1=GetNmObjId(NmObjStr1);
  TIntV NmObjDocIdV1; GetNmObjDocIdV(NmObjId1, NmObjDocIdV1);
  NmObjDocIdV1.Sort();
  // get second named-object-id
  TIntV NmObjDocIdV2;
  if (!NmObjStr2.Empty()){
    int NmObjId2=GetNmObjId(NmObjStr2);
    GetNmObjDocIdV(NmObjId2, NmObjDocIdV2);
    NmObjDocIdV2.Sort();
  }
  // create joint doc-id-vector
  TIntV NmObjDocIdV;
  if (NmObjDocIdV2.Empty()){
    NmObjDocIdV=NmObjDocIdV1;
  } else {
    NmObjDocIdV1.Intrs(NmObjDocIdV2, NmObjDocIdV);
  }
  // traverse named-object-documents to collect bow-document-ids
  BowDIdV.Gen(NmObjDocIdV.Len(), 0);
  for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocIdV.Len(); NmObjDocIdN++){
    TStr DocNm=GetDocNm(NmObjDocIdV[NmObjDocIdN]);
    int DId=BowDocBs->GetDId(DocNm);
    if (DId!=-1){
      BowDIdV.Add(DId);
    } 
  }
}
Exemple #24
0
// Test node subgraph conversion
void TestConvertSubGraphs() {
  PNGraph NGraph;
  PUNGraph UNGraph;
  int N1, N2, N3;
  int E1, E2, E3;
  TIntV NIdV;
  int i;

  NGraph = GetTestTNGraph();
  N1 = NGraph->GetNodes();
  E1 = NGraph->GetEdges();

  for (i = 0; i < 20; i += 2) {
    NIdV.Add(i);
  }

  // TODO: fix TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true), it fails
  // UNGraph = TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true);
  UNGraph = TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV);
  N2 = UNGraph->GetNodes();
  E2 = UNGraph->GetEdges();

  NGraph = TSnap::ConvertSubGraph<PNGraph>(UNGraph, NIdV);
  N3 = NGraph->GetNodes();
  E3 = NGraph->GetEdges();

  printf("---- TestConvertSubGraphs -----\n");
  printf("nodes: %d,%d,%d,  edges: %d,%d,%d\n", N1, N2, N3, E1, E2, E3);
  printf("\n");
}
Exemple #25
0
// Test subgraphs
void TestEdgeSubNets() {
  TPt <TNodeEdgeNet<TInt, TInt> > Net;
  TPt <TNodeEdgeNet<TInt, TInt> > Net1;
  TPt <TNodeEdgeNet<TInt, TInt> > Net2;
  TPt <TNodeEdgeNet<TInt, TInt> > Net3;
  TPt <TNodeEdgeNet<TInt, TInt> > Net4;
  int i;
  TIntV NIdV;
  TIntV NIdV1;

  Net = GetTestTNodeEdgeNet();
  PrintNet("TestEdgeSubNets", Net);

  for (i = 10; i < 15; i++) {
    NIdV.Add(i);
  }

  Net1 = TSnap::GetSubGraph(Net, NIdV);
  PrintNet("TestEdgeSubNets1", Net1);

  for (i = 0; i < 20; i += 2) {
    NIdV1.Add(i);
  }

  Net2 = TSnap::GetSubGraph(Net, NIdV1);
  PrintNet("TestEdgeSubNets2", Net2);

  Net3 = TSnap::GetEDatSubGraph(Net, 1, 0);
  PrintNet("TestEdgeSubNets3", Net3);

  Net4 = TSnap::GetEDatSubGraph(Net, 2, -1);
  PrintNet("TestEdgeSubNets4", Net4);
}
Exemple #26
0
//////////////////////////////////////////////////////////////////////////
// String-To-Words
void TStrParser::DocStrToWIdV(const TStr& _DocStr, TIntV& WordIdV, const bool& Stemm) {
    TStr DocStr = _DocStr.GetUc();  // to upper case
    TStrV WordV; DocStr.SplitOnWs(WordV); int WordN = WordV.Len();
    WordIdV.Reserve(WordN, 0);

    PStemmer Stemmer = TStemmer::New(stmtPorter);
    TIntH WordsInDoc;
    for (int WordC = 0; WordC < WordN; WordC++) {
        TStr WordStr;
        if (Stemm) {
            WordStr = Stemmer->GetStem(WordV[WordC]);
        } else {
            WordStr = WordV[WordC];
        }
        int WId = GetWId(WordStr);
        if (WId == -1) {
            WId = WordToIdH.AddKey(WordStr);
            WordToIdH[WId] = 0;
        }
        WordIdV.Add(WId);
        
        // is it first time we see this word in this doc?
        if (!WordsInDoc.IsKey(WId)) WordsInDoc.AddKey(WId);
    }

    //do some statistics for DF
    DocsParsed++;
    for (int i = 0, l = WordsInDoc.Len(); i < l; i++)
        WordToIdH[WordsInDoc.GetKey(i)]++;

    Assert(WordV.Len() == WordIdV.Len());
}
Exemple #27
0
/// Shingles by words
void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase,
    TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen,
    THash<TMd5Sig, TIntV>& ShingleToClusterIds) {
  Err("Hashing shingles of clusters...\n");
  for (int i = 0; i < ClusterIds.Len(); i++) {
    if (i % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len());
    }
    TCluster C;
    ClusterBase->GetCluster(ClusterIds[i], C);
    //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val);

    // Put x-word shingles into hash table; x is specified by ShingleLen parameter
    THashSet < TMd5Sig > CHashedShingles;
    GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles);
    for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI();
        Hash < CHashedShingles.EndI(); Hash++) {
      TIntV ShingleClusterIds;
      if (ShingleToClusterIds.IsKey(*Hash)) {
        ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash);
      }
      ShingleClusterIds.Add(ClusterIds[i]);
      ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds);
    }
  }
  Err("Done hashing!\n");
}
Exemple #28
0
void TYInvIx::GetDocIdV(
 const PYWordDs& WordDs, const int& MnDocFq, TIntV& DocIdV){
  IAssert(MnDocFq>=0);
  if (MnDocFq==0){
    DocIdV=AllDocIdV;
  } else {
    TIntIntH DocIdFqH(100); int MxDocFq=0;
    int WordIdN=WordDs->FFirstWordId(); int WordId; double WordFq;
    while (WordDs->FNextWordId(WordIdN, WordId, WordFq)){
      if (WordIdToFirstDocIdNH.IsKey(WordId)){
        int DocIdN=FFirstDocId(WordId); int DocId;
        while (FNextWordId(DocIdN, DocId)){
          DocIdFqH.AddDat(DocId)+=int(WordFq);
          MxDocFq=TInt::GetMx(MxDocFq, DocIdFqH.GetDat(DocId));
        }
      }
    }
    int NewMnDocFq=(MnDocFq<=MxDocFq) ? MnDocFq : MxDocFq-3;
    DocIdV.Gen(DocIdFqH.Len(), 0);
    int DocIdP=DocIdFqH.FFirstKeyId();
    while (DocIdFqH.FNextKeyId(DocIdP)){
      int DocId=DocIdFqH.GetKey(DocIdP);
      int DocFq=DocIdFqH[DocIdP];
      if (DocFq>=NewMnDocFq){DocIdV.Add(DocId);}
    }
  }
}
Exemple #29
0
void TYFSelBs::GetBestWordIdV(
 const int& DocId, const double& EstExp, const double& SumEstPrb,
 const PYWordDs& IntrsWordDs, TIntV& BestWordIdV){
  TIntFltKdV& WordIdEstKdV=DocIdToWordIdEstVV[DocId];
  TFltIntKdV WordEstIdKdV(WordIdEstKdV.Len(), 0);
  double MnWordEst=TFlt::Mx;
  for (int WordIdN=0; WordIdN<WordIdEstKdV.Len(); WordIdN++){
    int WordId=WordIdEstKdV[WordIdN].Key;
    double WordEst=pow(WordIdEstKdV[WordIdN].Dat, EstExp);
    if (IntrsWordDs->IsWordId(WordId)){
      WordEstIdKdV.Add(TFltIntKd(WordEst, WordId));
      MnWordEst=TFlt::GetMn(WordEst, MnWordEst);
    }
  }
  double SumWordEst=0;
  {for (int WordIdN=0; WordIdN<WordEstIdKdV.Len(); WordIdN++){
    SumWordEst+=(WordEstIdKdV[WordIdN].Key-=MnWordEst);}}
  WordEstIdKdV.Sort(false);

  {BestWordIdV.Gen(WordEstIdKdV.Len(), 0);
  SumWordEst*=SumEstPrb; int WordIdN=0;
  while ((SumWordEst>=0)&&(WordIdN<WordEstIdKdV.Len())){
    double WordEst=WordEstIdKdV[WordIdN].Key;
    int WordId=WordEstIdKdV[WordIdN].Dat;
    SumWordEst-=WordEst;
    BestWordIdV.Add(WordId);
    WordIdN++;
  }}
}
Exemple #30
0
PJsonVal TGraphCascade::GetPosterior(const TStrV& NodeNmV, const TFltV& QuantileV) const {
    PJsonVal Result = TJsonVal::NewObj();
    TIntV NodeIdV;
    if (NodeNmV.Empty()) {
        // go over all zero timestamps for which samples exist
        TIntV FullNodeIdV; Graph.GetNIdV(FullNodeIdV);
        int Nodes = Graph.GetNodes();
        for (int NodeN = 0; NodeN < Nodes; NodeN++) {
            int NodeId = FullNodeIdV[NodeN];
            if (Timestamps.IsKey(NodeId) && Sample.IsKey(NodeId) && !Sample.GetDat(NodeId).Empty() && Timestamps.GetDat(NodeId) == 0) {
                NodeIdV.Add(NodeId);
            }
        }
    } else {
        int Nodes = NodeNmV.Len();
        for (int NodeN = 0; NodeN < Nodes; NodeN++) {
            if (!NodeNmIdH.IsKey(NodeNmV[NodeN])) { continue; }
            int NodeId = NodeNmIdH.GetDat(NodeNmV[NodeN]);
            if (Timestamps.IsKey(NodeId) && Sample.IsKey(NodeId) && !Sample.GetDat(NodeId).Empty() && Timestamps.GetDat(NodeId) == 0) {
                NodeIdV.Add(NodeId);
            }
        }
    }
    EAssertR(QuantileV.Len() > 0, "TGraphCascade::GetPosterior quantiles should not be empty!");
    for (int QuantileN = 0; QuantileN < QuantileV.Len(); QuantileN++) {
        EAssertR((QuantileV[QuantileN] >= 0.0) && (QuantileV[QuantileN] <= 1.0), "TGraphCascade::GetPosterior quantiles should be between 0.0 and 1.0");
    }

    int Nodes = NodeIdV.Len();
    for (int NodeN = 0; NodeN < Nodes; NodeN++) {
        int NodeId = NodeIdV[NodeN];
        TStr NodeNm = NodeIdNmH.GetDat(NodeId);
        int Quantiles = QuantileV.Len();
        TUInt64V SampleV = Sample.GetDat(NodeId);
        SampleV.Sort(true);
        int SampleSize = SampleV.Len();
        PJsonVal QuantilesArr = TJsonVal::NewArr();
        for (int QuantileN = 0; QuantileN < Quantiles; QuantileN++) {
            int Idx = (int)floor(QuantileV[QuantileN] * SampleSize);
            Idx = MIN(Idx, SampleSize - 1);
            uint64 UnixTimestamp = TTm::GetUnixMSecsFromWinMSecs(SampleV[Idx]);
            QuantilesArr->AddToArr((double)UnixTimestamp);
        }
        Result->AddToObj(NodeNm, QuantilesArr);
    }
    return Result;
}