void TGraphCascade::TopologicalSort(TIntV& SortedNIdV) { int Nodes = Graph.GetNodes(); SortedNIdV.Gen(Nodes, 0); // result THash<TInt, TBool> Marks(Nodes); // nodeid -> mark map THash<TInt,TBool> TempMarks(Nodes); // nodeid -> temp mark map THash<TInt, TBool> Added(Nodes); TIntV NIdV; Graph.GetNIdV(NIdV); // all node ids // set marks for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NIdV[NodeN]; Marks.AddDat(NodeId, false); TempMarks.AddDat(NodeId, false); Added.AddDat(NodeId, false); } TSStack<TInt> Stack; for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NIdV[NodeN]; // select an unmarked node if (!Marks.GetDat(NodeId)) { Stack.Push(NodeId); while (!Stack.Empty()) { // visit TopNode int TopNodeId = Stack.Top(); Marks.GetDat(TopNodeId) = true; TempMarks.GetDat(TopNodeId) = true; // add children, set their temp marks to true TNGraph::TNodeI NI = Graph.GetNI(TopNodeId); int Children = NI.GetOutDeg(); bool IsFinal = true; for (int ChildN = 0; ChildN < Children; ChildN++) { int ChildId = NI.GetOutNId(ChildN); EAssertR(!TempMarks.GetDat(ChildId), "TGraphCascade::TopologicalSort: the graph is not a DAG!"); if (!Marks.GetDat(ChildId)) { // unvisited node IsFinal = false; Stack.Push(ChildId); } } if (IsFinal) { // push TopNode to tail if (!Added.GetDat(TopNodeId)) { SortedNIdV.Add(TopNodeId); Added.GetDat(TopNodeId) = true; } TempMarks.GetDat(TopNodeId) = false; Stack.Pop(); } } } } SortedNIdV.Reverse(); }
void TSkyGridEnt::GetEntClustV(const TSkyGridBs* SkyGridBs, const uint64& MnTm, const int& MnDocs, const int& MxDocs, const int& Clusts, TVec<TStrFltPrV>& EntNmWgtPrVV) const { EntNmWgtPrVV.Clr(); // create bow PBowDocBs BowDocBs=TBowDocBs::New(); // collect documents TIntV DocIdV; GetDocIdV(SkyGridBs, MnTm, 0, DocIdV); DocIdV.Reverse(); DocIdV.Shuffle(TRnd(1)); DocIdV.Trunc(MxDocs); if (DocIdV.Len()<MnDocs){return;} for (int DocN=0; DocN<DocIdV.Len(); DocN++){ int DocId=DocIdV[DocN]; PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId); // create vector of entity-weights TIntFltPrV WIdWgtPrV; for (int EntN=0; EntN<Doc->GetEnts(); EntN++){ int EntId; int EntFq; Doc->GetEntNmFq(EntN, EntId, EntFq); TStr EntNm=SkyGridBs->GetEntNm(EntId); int EntWId=BowDocBs->AddWordStr(EntNm); WIdWgtPrV.Add(TIntFltPr(EntWId, EntFq)); } // create bow-document int DId=BowDocBs->AddDoc(TInt::GetStr(DocId), TStrV(), WIdWgtPrV); TStr DocDescStr=Doc->GetTitleStr(); BowDocBs->PutDocDescStr(DId, DocDescStr); } // k-means clustering PBowSim BowSim=TBowSim::New(bstCos); // similarity object TBowWordWgtType WordWgtType=bwwtNrmTFIDF; // define weighting PBowDocPart BowDocPart=TBowClust::GetKMeansPart( TNotify::StdNotify, // log output BowDocBs, // document data BowSim, // similarity function TRnd(1), // random generator Clusts, // number of clusters 1, // trials per k-means 1, // convergence epsilon for k-means 1, // min. documents per cluster WordWgtType, // word weighting 0, // cut-word-weights percentage 0); // minimal word frequency EntNmWgtPrVV.Clr(); for (int ClustN=0; ClustN<BowDocPart->GetClusts(); ClustN++){ PBowDocPartClust Clust=BowDocPart->GetClust(ClustN); TStrFltPrV WordStrWgtPrV; Clust->GetTopWordStrWgtPrV(BowDocBs, 25, 0.5, WordStrWgtPrV); EntNmWgtPrVV.Add(WordStrWgtPrV); } //BowDocPart->SaveTxt("Clusts.Txt", BowDocBs, true, 25, 0.5, false); }