Ejemplo n.º 1
0
void TGraphCascade::TopologicalSort(TIntV& SortedNIdV) {
    int Nodes = Graph.GetNodes();
        
    SortedNIdV.Gen(Nodes, 0); // result
    THash<TInt, TBool> Marks(Nodes); // nodeid -> mark map
    THash<TInt,TBool> TempMarks(Nodes); // nodeid -> temp mark map
    THash<TInt, TBool> Added(Nodes);
    TIntV NIdV;  Graph.GetNIdV(NIdV); // all node ids

    // set marks
    for (int NodeN = 0; NodeN < Nodes; NodeN++) {
        int NodeId = NIdV[NodeN];
        Marks.AddDat(NodeId, false);
        TempMarks.AddDat(NodeId, false);
        Added.AddDat(NodeId, false);
    }

    TSStack<TInt> Stack;
    for (int NodeN = 0; NodeN < Nodes; NodeN++) {
        int NodeId = NIdV[NodeN];
        // select an unmarked node
        if (!Marks.GetDat(NodeId)) {
            Stack.Push(NodeId);
            while (!Stack.Empty()) {
                // visit TopNode
                int TopNodeId = Stack.Top();
                Marks.GetDat(TopNodeId) = true;
                TempMarks.GetDat(TopNodeId) = true;
                // add children, set their temp marks to true
                TNGraph::TNodeI NI = Graph.GetNI(TopNodeId);
                int Children = NI.GetOutDeg();
                bool IsFinal = true;
                for (int ChildN = 0; ChildN < Children; ChildN++) {
                    int ChildId = NI.GetOutNId(ChildN);
                    EAssertR(!TempMarks.GetDat(ChildId), "TGraphCascade::TopologicalSort: the graph is not a DAG!");
                    if (!Marks.GetDat(ChildId)) {
                        // unvisited node
                        IsFinal = false;
                        Stack.Push(ChildId);
                    }
                }
                if (IsFinal) {
                    // push TopNode to tail
                    if (!Added.GetDat(TopNodeId)) {
                        SortedNIdV.Add(TopNodeId);
                        Added.GetDat(TopNodeId) = true;
                    }
                    TempMarks.GetDat(TopNodeId) = false;
                    Stack.Pop();
                }
            }
        }
    }
    SortedNIdV.Reverse();
}
Ejemplo n.º 2
0
void TSkyGridEnt::GetEntClustV(const TSkyGridBs* SkyGridBs,
 const uint64& MnTm, const int& MnDocs, const int& MxDocs, const int& Clusts,
 TVec<TStrFltPrV>& EntNmWgtPrVV) const {
  EntNmWgtPrVV.Clr();
  // create bow
  PBowDocBs BowDocBs=TBowDocBs::New();
  // collect documents
  TIntV DocIdV; GetDocIdV(SkyGridBs, MnTm, 0, DocIdV);
  DocIdV.Reverse(); DocIdV.Shuffle(TRnd(1)); DocIdV.Trunc(MxDocs);
  if (DocIdV.Len()<MnDocs){return;}
  for (int DocN=0; DocN<DocIdV.Len(); DocN++){
    int DocId=DocIdV[DocN];
    PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId);
    // create vector of entity-weights
    TIntFltPrV WIdWgtPrV;
    for (int EntN=0; EntN<Doc->GetEnts(); EntN++){
      int EntId; int EntFq; Doc->GetEntNmFq(EntN, EntId, EntFq);
      TStr EntNm=SkyGridBs->GetEntNm(EntId);
      int EntWId=BowDocBs->AddWordStr(EntNm);
      WIdWgtPrV.Add(TIntFltPr(EntWId, EntFq));
    }
    // create bow-document
    int DId=BowDocBs->AddDoc(TInt::GetStr(DocId), TStrV(), WIdWgtPrV);
    TStr DocDescStr=Doc->GetTitleStr();
    BowDocBs->PutDocDescStr(DId, DocDescStr);
  }
  // k-means clustering
  PBowSim BowSim=TBowSim::New(bstCos); // similarity object
  TBowWordWgtType WordWgtType=bwwtNrmTFIDF; // define weighting
  PBowDocPart BowDocPart=TBowClust::GetKMeansPart(
   TNotify::StdNotify, // log output
   BowDocBs, // document data
   BowSim, // similarity function
   TRnd(1), // random generator
   Clusts, // number of clusters
   1, // trials per k-means
   1, // convergence epsilon for k-means
   1, // min. documents per cluster
   WordWgtType, // word weighting
   0, // cut-word-weights percentage
   0); // minimal word frequency
  EntNmWgtPrVV.Clr();
  for (int ClustN=0; ClustN<BowDocPart->GetClusts(); ClustN++){
    PBowDocPartClust Clust=BowDocPart->GetClust(ClustN);
    TStrFltPrV WordStrWgtPrV;
    Clust->GetTopWordStrWgtPrV(BowDocBs, 25, 0.5, WordStrWgtPrV);
    EntNmWgtPrVV.Add(WordStrWgtPrV);
  }
  //BowDocPart->SaveTxt("Clusts.Txt", BowDocBs, true, 25, 0.5, false);
}