Exemplo n.º 1
0
void PlotSngValDistr(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr) {
  const int NBuckets = 50;
  TFltV SngValV;
  for (int f = 1; SngValV.Empty() && f < 4; f++) {
    TSnap::GetSngVals(Graph, f*SngVals, SngValV);
  }
  SngValV.Sort(true);
  THash<TFlt, TFlt> BucketCntH;
  double Step = (SngValV.Last()-SngValV[0]) / double(NBuckets-1);
  for (int i = 0; i < NBuckets; i++) {
    BucketCntH.AddDat(SngValV[0]+Step*(i+0.5), 0);
  }
  for (int i = 0; i < SngValV.Len(); i++) {
    const int Bucket = (int) floor((SngValV[i]-SngValV[0]) / Step);
    BucketCntH[Bucket] += 1;
  }
  TFltPrV EigCntV;
  BucketCntH.GetKeyDatPrV(EigCntV);
  if (DescStr.Empty()) { DescStr = FNmPref; }
  TGnuPlot::PlotValV(EigCntV, "sngDistr."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f", DescStr.CStr(),
    Graph->GetNodes(), Graph->GetEdges(), SngValV.Last().Val), "Singular value", "Count", gpsAuto, false, gpwLinesPoints);
}
Exemplo n.º 2
0
double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) {
    int len = AttrV.Len();
    double res = 0;
    if (len < NewTheta.Len()) {
        res = NewTheta.Last();    //if feature vector is shorter, add an intercept
    }
    for (int i = 0; i < len; i++) {
        if (i < NewTheta.Len()) {
            res += AttrV[i] * NewTheta[i];
        }
    }
    double mu = 1 / (1 + exp(-res));
    return mu;
}
Exemplo n.º 3
0
/// estimate number of communities using AGM
int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
    TRnd Rnd(RndSeed);
    int LambdaIter = 100;
    if (Graph->GetNodes() < 200) {
        LambdaIter = 1;
    }
    if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) {
        LambdaIter = 100;
    }

    //Find coms with large C
    TAGMFit AGMFitM(Graph, InitComs, RndSeed);
    if (PNoCom > 0.0) {
        AGMFitM.SetPNoCom(PNoCom);
    }
    AGMFitM.RunMCMC(MaxIter, LambdaIter, "");

    int TE = Graph->GetEdges();
    TFltV RegV;
    RegV.Add(0.3 * TE);
    for (int r = 0; r < 25; r++) {
        RegV.Add(RegV.Last() * RegGap);
    }
    TFltPrV RegComsV, RegLV, RegBICV;
    TFltV LV, BICV;
    //record likelihood and number of communities with nonzero P_c
    for (int r = 0; r < RegV.Len(); r++) {
        double RegCoef = RegV[r];
        AGMFitM.SetRegCoef(RegCoef);
        AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
        AGMFitM.SetRegCoef(0.0);

        TVec<TIntV> EstCmtyVV;
        AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
        int NumLowQ = EstCmtyVV.Len();
        RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));

        if (EstCmtyVV.Len() > 0) {
            TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
            AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
            double CurL = AFTemp.Likelihood();
            LV.Add(CurL);
            BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
        }
        else {
            break;
        }
    }
    // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
    if (LV.Len() == 0) {
        return 2;
    }
    else if (LV[0] == LV.Last()) {
        return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2);
    }


    //normalize likelihood and BIC to 0~100
    int MaxL = 100;
    {
        TFltV& ValueV = LV;
        TFltPrV& RegValueV = RegLV;
        double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
        for (int l = 0; l < ValueV.Len(); l++) {
            if (ValueV[l] < MinValue) {
                MinValue = ValueV[l];
            }
            if (ValueV[l] > MaxValue) {
                MaxValue = ValueV[l];
            }
        }
        while (ValueV.Len() < RegV.Len()) {
            ValueV.Add(MinValue);
        }
        double RangeVal = MaxValue - MinValue;
        for (int l = 0; l < ValueV.Len(); l++) {
            RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
        }

    }
    {
        TFltV& ValueV = BICV;
        TFltPrV& RegValueV = RegBICV;
        double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
        for (int l = 0; l < ValueV.Len(); l++) {
            if (ValueV[l] < MinValue) {
                MinValue = ValueV[l];
            }
            if (ValueV[l] > MaxValue) {
                MaxValue = ValueV[l];
            }
        }
        while (ValueV.Len() < RegV.Len()) {
            ValueV.Add(MaxValue);
        }
        double RangeVal = MaxValue - MinValue;
        for (int l = 0; l < ValueV.Len(); l++) {
            RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
        }
    }

    //fit logistic regression to normalized likelihood.
    TVec<TFltV> XV(RegLV.Len());
    TFltV YV (RegLV.Len());
    for (int l = 0; l < RegLV.Len(); l++) {
        XV[l] = TFltV::GetV(log(RegLV[l].Val1));
        YV[l] = RegLV[l].Val2 / (double) MaxL;
    }
    TFltPrV LRVScaled, LRV;
    TLogRegFit LRFit;
    PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
    for (int l = 0; l < RegLV.Len(); l++) {
        LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
        LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
    }

    //estimate # communities from fitted logistic regression
    int NumComs = 0, IdxRegDrop = 0;
    double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
    double LeftReg = 0.0, RightReg = 0.0;
    TFltV Theta;
    LRMd->GetTheta(Theta);
    RegDrop = (- Theta[1] - LRThres) / Theta[0];
    if (RegDrop <= XV[0][0]) {
        NumComs = (int) RegComsV[0].Val2;
    }
    else if (RegDrop >= XV.Last()[0]) {
        NumComs = (int) RegComsV.Last().Val2;
    }
    else {  //interpolate for RegDrop
        for (int i = 0; i < XV.Len(); i++) {
            if (XV[i][0] > RegDrop) {
                IdxRegDrop = i;
                break;
            }
        }

        if (IdxRegDrop == 0) {
            printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
            for (int l = 0; l < RegLV.Len(); l++) {
                printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
            }
        }
        IAssert(IdxRegDrop > 0);
        LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
        RightReg = XV[IdxRegDrop][0] - RegDrop;
        NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));

    }
    //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
    printf("Num Coms:%d\n", NumComs);
    if (NumComs < 2) {
        NumComs = 2;
    }

    if (PltFPrx.Len() > 0) {
        TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
        TGnuPlot GPC(PltFPrx + ".l");
        GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
        GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
        GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
        GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
        GPC.SetScale(gpsLog10X);
        GPC.SetTitle(PlotTitle);
        GPC.SavePng(PltFPrx + ".l.png");
    }

    return NumComs;
}