///////////////////////////////////////////////// // Top2 Friends network void TTop2FriendNet::SetTop2() { Top2NIdH.Gen(Net->GetNodes()); TFltIntPrV WgtNIdV; for (TWgtNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) { WgtNIdV.Clr(false); for (int e = 0; e < NI.GetOutDeg(); e++) { WgtNIdV.Add(TFltIntPr(NI.GetOutEDat(e), NI.GetOutNId(e))); } WgtNIdV.Shuffle(TInt::Rnd); // so that ties are broken randomly WgtNIdV.Sort(false); if (WgtNIdV.Len() == 0) { Top2NIdH.AddDat(NI.GetId(), TIntPr(-1, -1)); } else if (WgtNIdV.Len() == 1) { Top2NIdH.AddDat(NI.GetId(), TIntPr(WgtNIdV[0].Val2, -1)); } else if (WgtNIdV.Len() >= 2) { Top2NIdH.AddDat(NI.GetId(), TIntPr(WgtNIdV[0].Val2, WgtNIdV[1].Val2)); } } // create union find structure PNGraph Top1Net = GetTop1Net(); Top1UF = TUnionFind(Top1Net->GetNodes()); TCnComV CnComV; TCnCom::GetWccs(Top1Net, CnComV); for (TWgtNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) { Top1UF.Add(NI.GetId()); } for (int c = 0; c < CnComV.Len(); c++) { for (int i = 1; i < CnComV[c].Len(); i++) { Top1UF.Union(CnComV[c][0], CnComV[c][i]); } } }
// to get first few eigenvectors void GetEigVec(const PUNGraph& Graph, const int& EigVecs, TFltV& EigValV, TVec<TFltV>& EigVecV) { const int Nodes = Graph->GetNodes(); // Lanczos TUNGraphMtx GraphMtx(Graph); int CalcVals = int(2*EigVecs); if (CalcVals > Nodes) { CalcVals = Nodes; } TFltVV EigVecVV; //while (EigValV.Len() < EigVecs && CalcVals < 10*EigVecs) { try { TSparseSVD::Lanczos(GraphMtx, EigVecs, 2*EigVecs, ssotFull, EigValV, EigVecVV, false); } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } if (EigValV.Len() < EigVecs) { printf(" ***TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } // CalcVals += EigVecs; //} TFltIntPrV EigValIdV; for (int i = 0; i < EigValV.Len(); i++) { EigValIdV.Add(TFltIntPr(EigValV[i], i)); } EigValIdV.Sort(false); EigValV.Sort(false); for (int v = 0; v < EigValIdV.Len(); v++) { // vector components are not sorted!!! EigVecV.Add(); EigVecVV.GetCol(EigValIdV[v].Val2, EigVecV.Last()); } IsAllValVNeg(EigVecV[0], true); }
void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) { const int Nodes = Graph->GetNodes(); SngValV.Clr(); LeftSV.Clr(); RightSV.Clr(); TFltVV LSingV, RSingV; if (Nodes < 100) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TIntH NodeIdH; // create adjecency matrix (1-based) for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV); //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt"); //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX } TFltIntPrV SngValIdV; for (int i = 0; i < SngValV.Len(); i++) { SngValIdV.Add(TFltIntPr(SngValV[i], i)); } SngValIdV.Sort(false); SngValV.Sort(false); for (int v = 0; v < SngValIdV.Len(); v++) { LeftSV.Add(); LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last()); RightSV.Add(); RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last()); } IsAllValVNeg(LeftSV[0], true); IsAllValVNeg(RightSV[0], true); }
bool TDecisionTree::TNode::CanSplitNumFtr(const TFltIntPrV& ValClassPrV, const int& TotalPos, double& CutVal, double& Score) const { const int NInst = ValClassPrV.Len(); const int MxCutN = NInst-1; Score = TFlt::NInf; int PosS0 = 0; // the number of positive instances in the left set int CutN = 0; while (CutN < MxCutN) { const TFltIntPr& FtrValClassPr = ValClassPrV[CutN]; const double& CurrVal = FtrValClassPr.Val1; const int& CurrClass = FtrValClassPr.Val2; PosS0 += CurrClass; // the cut point always occurs on the boundary between two classes // so if the class doesn't change there is not need to check if (CurrClass != ValClassPrV[CutN+1].Val2) { // if the values of the attribute are the same then move // to where they first change since that is where the cut will // actually be performed while (CutN < MxCutN && ValClassPrV[CutN+1].Val1 == CurrVal) { CutN++; PosS0 += ValClassPrV[CutN].Val2; } const int S0Len = CutN + 1; const int S1Len = NInst - S0Len; const int PosS1 = TotalPos - PosS0; const double CurrScore = Tree->GetSplitScore(S0Len, S1Len, PosS0, PosS1); if (CurrScore > Score) { Score = CurrScore; CutVal = (CurrVal + ValClassPrV[CutN+1].Val1) / 2; } } CutN++; } return Score != TFlt::NInf; }