int main(int argc, char *argv[]) {
  // #### SETUP: Parse Arguments
  LogOutput Log;
  THash<TStr, TStr> Arguments;
  ArgumentParser::ParseArguments(argc, argv, Arguments, Log);

  TStr OutputDirectory;
  TStr StartString = ArgumentParser::GetArgument(Arguments, "start", "2009-02-01");
  TStr QBDBDirectory = ArgumentParser::GetArgument(Arguments, "qbdb", QBDB_DIR_DEFAULT);
  TStr OutDirectory = ArgumentParser::GetArgument(Arguments, "out", "/lfs/1/tmp/curis/");
  TInt WindowSize = ArgumentParser::GetArgument(Arguments, "window", "14").GetInt();

  if (ArgumentParser::GetArgument(Arguments, "nolog", "") == "") {
    Log.DisableLogging();
  } else if (!Arguments.IsKeyGetDat("directory", OutputDirectory)) {
    Log.SetupNewOutputDirectory("");
  } else {
    Log.SetDirectory(OutputDirectory);
  }

  // #### DATA LOADING: Load ALL the things!
  TQuoteBase QB;
  TDocBase DB;
  fprintf(stderr, "Loading QB and DB from file for %d days, starting from %s...\n", WindowSize.Val, StartString.CStr());
  Err("%s\n", QBDBDirectory.CStr());
  TSecTm PresentTime = TDataLoader::LoadQBDBByWindow(QBDBDirectory, StartString, WindowSize, QB, DB);
  fprintf(stderr, "QBDB successfully loaded!\n");

  TVec<TSecTm> PubTmV;
  TVec<TStr> PostUrlV;
  TVec<TStr> QuoteV;

  fprintf(stderr, "Dumping quotes to file...\n");
  TIntV QuoteIds;
  QB.GetAllQuoteIds(QuoteIds);
  for (int i = 0; i < QuoteIds.Len(); i++) {
    TQuote Q;
    QB.GetQuote(QuoteIds[i], Q);
    TStr QContentString;
    Q.GetContentString(QContentString);

    TVec<TUInt> Sources;
    Q.GetSources(Sources);
    for (int j = 0; j < Sources.Len(); j++) {
      TDoc D;
      DB.GetDoc(Sources[j], D);
      TStr PostUrl;
      D.GetUrl(PostUrl);
      TSecTm PostTime = D.GetDate();
      QuoteV.Add(QContentString);
      PubTmV.Add(PostTime);
      PostUrlV.Add(PostUrl);
    }
  }

  TFOut FOut(OutDirectory + "QuoteList" + ".bin");
  PubTmV.Save(FOut);
  PostUrlV.Save(FOut);
  QuoteV.Save(FOut);

  fprintf(stderr, "Done!\n");
  return 0;
}
Exemple #2
0
	TQmParam(const TStr& FNm) {
		EAssertR(TFile::Exists(FNm), "Missing configuration file " + FNm);
		// load configuration file
		PJsonVal ConfigVal = TJsonVal::GetValFromSIn(TFIn::New(FNm));
		EAssertR(ConfigVal->IsObj(), "Invalid setting file - not valid JSON");
		// parse out common stuff
		RootFPath = TStr::GetNrFPath(ConfigVal->GetObjStr("directory", TDir::GetCurDir()));
		LockFNm = RootFPath + "./lock";
		DbFPath = ConfigVal->GetObjStr("database", "./db/");
		PortN = TFlt::Round(ConfigVal->GetObjNum("port"));
		// parse out unicode definition file
		TStr UnicodeFNm = ConfigVal->GetObjStr("unicode", TQm::TEnv::QMinerFPath + "./UnicodeDef.Bin");
		if (!TUnicodeDef::IsDef()) { TUnicodeDef::Load(UnicodeFNm); }

		// parse cache
		if (ConfigVal->IsObjKey("cache")) { 
			PJsonVal CacheVal = ConfigVal->GetObjKey("cache");
			// parse out index and default store cache sizes
			IndexCacheSize = int64(CacheVal->GetObjNum("index", 1024)) * int64(TInt::Mega);
			StoreCacheSize = int64(CacheVal->GetObjNum("store", 1024)) * int64(TInt::Mega);
			// prase out store specific sizes, when available
			if (CacheVal->IsObjKey("stores")) {
				PJsonVal StoreCacheVals = CacheVal->GetObjKey("stores");
				for (int StoreN = 0; StoreN < StoreCacheVals->GetArrVals(); StoreN++) {
					PJsonVal StoreCacheVal = StoreCacheVals->GetArrVal(StoreN);					
					TStr StoreName = StoreCacheVal->GetObjStr("name");
					uint64 StoreCacheSize = int64(StoreCacheVal->GetObjNum("size")) * int64(TInt::Mega);
					StoreCacheSizes.AddDat(StoreName, StoreCacheSize);
				}
			}
		} else {
			// default sizes are set to 1GB for index and stores			
			IndexCacheSize = int64(1024) * int64(TInt::Mega);
			StoreCacheSize = int64(1024) * int64(TInt::Mega);
		}

		// load scripts
		if (ConfigVal->IsObjKey("script")) {
			// we have configuration file, read it
			PJsonVal JsVals = ConfigVal->GetObjKey("script");
			if (JsVals->IsArr()) {
				for (int JsValN = 0; JsValN < JsVals->GetArrVals(); JsValN++) {
					JsParamV.Add(TJsParam(RootFPath, JsVals->GetArrVal(JsValN)));
				}
			} else {
				JsParamV.Add(TJsParam(RootFPath, JsVals));
			}
		} else {
			// no settings for scripts, assume default setting
			TStr SrcFPath = TStr::GetNrAbsFPath("src", RootFPath);
			TFFile File(SrcFPath, ".js", false); TStr SrcFNm;
			while (File.Next(SrcFNm)) {
				JsParamV.Add(TJsParam(RootFPath, SrcFNm));
			}
		}

		// load serving folders
		//TODO: Add to qm config ability to edit this
		if (ConfigVal->IsObjKey("wwwroot")) {
			PJsonVal WwwVals = ConfigVal->GetObjKey("wwwroot");
			if (WwwVals->IsArr()) {
				for (int WwwValN = 0; WwwValN < WwwVals->GetArrVals(); WwwValN++) {
					AddWwwRoot(WwwVals->GetArrVal(WwwValN));
				}
			} else {
				AddWwwRoot(WwwVals);
			}			
		}
		// check for folder with admin GUI
		TStr GuiFPath = TStr::GetNrAbsFPath("gui", TQm::TEnv::QMinerFPath);
		if (TDir::Exists(GuiFPath)) {
			WwwRootV.Add(TStrPr("admin", GuiFPath));
		}
	}
Exemple #3
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // get command line parameters
  Env.PrepArgs("Bag-Of-Words To Bag-Of-Word-Weights using precalculated weights");
  TStr InBowFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-BagOfWords-FileName");
  TStr OutBowwFNm=Env.GetIfArgPrefixStr("-o:", "", "Output-BagOfWordWeights-FileName");
  TStr Type=Env.GetIfArgPrefixStr("-type:", "", "Method-Type (load, svm)");
  TStr InWgtFNm=Env.GetIfArgPrefixStr("-iwgt:", "", "Input-Matlab-WordWeights-FileName");
  double SvmC=Env.GetIfArgPrefixFlt("-svmcost:", 1.0, "Svm-Cost-Parameter");
  int SvmTime=Env.GetIfArgPrefixInt("-svmtime:", 60, "Max-Time-per-Model (in seconds)");
  bool PutUntiNorm=Env.GetIfArgPrefixBool("-unitnorm:", false, "Normalize-Document-Vectors");
  double CutWordWgtSumPrc=Env.GetIfArgPrefixFlt("-cutww:", 0.0, "Cut-Word-Weight-Sum-Percentage");
  int MnWordFq=Env.GetIfArgPrefixInt("-mnwfq:", 0, "Minimal-Word-Frequency");
  if (Env.IsEndOfRun()){return 0;}

  // load bow data
  printf("Loading bag-of-words data from '%s' ...", InBowFNm.CStr());
  PBowDocBs BowDocBs=TBowDocBs::LoadBin(InBowFNm);
  printf(" Done.\n");

  PBowDocWgtBs BowDocWgtBs;
  if (Type == "load") {
      // loading word weights
      printf("Loading word-weights data from '%s' ...", InWgtFNm.CStr());
      TVec<TFltV> WordWgtVV; 
      Fail; //TLAMisc::LoadTFltVV(InWgtFNm, WordWgtVV);
      IAssert(WordWgtVV.Len() == 1);
      printf(" Done.\n");

      TFltV& WordWgtV = WordWgtVV[0];
      for (int WgtN = 0; WgtN < WordWgtV.Len(); WgtN++) {
          if (WordWgtV[WgtN] > 0.0) {
              WordWgtV[WgtN] = sqrt(WordWgtV[WgtN]);
          }
      }

      // calculate boww data
      printf("Calculating bag-of-word-weights data ...");
      BowDocWgtBs = TBowDocWgtBs::NewPreCalcWgt(BowDocBs, 
          WordWgtV, PutUntiNorm, CutWordWgtSumPrc, MnWordFq);
      printf(" Done.\n"); 
  } else if (Type == "svm") {
      printf("Calculating bag-of-word-weights data ... \n");
      PBowDocWgtBs TfidfWgtBs = TBowDocWgtBs::New(BowDocBs, 
          bwwtLogDFNrmTFIDF, CutWordWgtSumPrc, MnWordFq);
      BowDocWgtBs = TBowDocWgtBs::NewSvmWgt(BowDocBs, TfidfWgtBs, 
          TIntV(), SvmC, SvmTime, false, TIntV(), PutUntiNorm, 
          CutWordWgtSumPrc, MnWordFq);
      printf("Done.\n"); 

  } else {
      printf("Wrong method type!\n");
  }

  // save boww data
  if (!OutBowwFNm.Empty()){
    TStr::PutFExtIfEmpty(OutBowwFNm, ".Boww");
    printf("Saving bag-of-word-weights data to '%s' ...", OutBowwFNm.CStr());
    BowDocWgtBs->SaveBin(OutBowwFNm);
    BowDocWgtBs->SaveTxtStat(OutBowwFNm + ".txt", BowDocBs, true, true, true);
    printf(" Done.\n");
  }

  return 0;
  Catch;
  return 1;
}
void ComputeMissingProperties (const TStr &Dir, const TStr &TriplesFilename)
{
  // Parse the rdf file and create the graph.
  TFIn File(TriplesFilename);
  TRDFParser DBpediaDataset(File);

  printf("Creating graph from input file...\n");
  TGraph G;
  TStrSet NodeStrs;
  TStrSet PropStrs;
  bool Parsed = TSnap::GetGraphFromRDFParser(DBpediaDataset, G, NodeStrs, PropStrs);
  if (!Parsed) {
    return;
  }

  // Store the graph and associated data
  G.Save(*TFOut::New(Dir + "graph.bin"));
  NodeStrs.Save(*TFOut::New(Dir + "nodeStrs.bin"));
  PropStrs.Save(*TFOut::New(Dir + "propStrs.bin"));

  printf("Computing objects...\n");
  // Get the objects of the graph. 
  TIntV Objects;
  // We defined the objects to be the nodes with prefix http://dbpedia.org/resource/. 
  TObjectFunctor ObjectFunctor(NodeStrs);
  TObjectUtils::GetObjects(G, ObjectFunctor, Objects);
  // Store and print the objects.
  Objects.Save(*TFOut::New(Dir + "objects.bin"));
  TObjectUtils::PrintObjects(Objects, NodeStrs, *TFOut::New(Dir + "objects.txt"));

  printf("Computing object matrix...\n");
  // Here we choose the descriptors for the objects.
  // We chose property + nbh (value) descriptors for objects
  // We could also use more complicated descriptors such as subgraphs or subnetworks.
  TSparseColMatrix ObjectMatrix1;
  TSparseColMatrix ObjectMatrix2;
  TObjectUtils::GetPropertyCount(Objects, G, ObjectMatrix1);
  TObjectUtils::GetNbhCount(Objects, G, ObjectMatrix2);
  TLAUtils::NormalizeMatrix(ObjectMatrix1);
  TLAUtils::NormalizeMatrix(ObjectMatrix2);

  TSparseColMatrix ObjectMatrix;
  TLAUtils::ConcatenateMatricesRowWise(ObjectMatrix1, ObjectMatrix2, ObjectMatrix);
  TLAUtils::NormalizeMatrix(ObjectMatrix);
  ObjectMatrix.Save(*TFOut::New(Dir + "objectMatrix.bin"));

  printf("Clustering objects...\n");
  // Partition the objects into 64 partitions (clusters).
  int K = 64;
  int NumIterations = 20;
  TIntV Assigments;
  TVec<TIntV> Clusters;
  TClusterUtils::GetClusters(ObjectMatrix, K, NumIterations, Assigments, Clusters);
  // Store the clustering data.
  Assigments.Save(*TFOut::New(Dir + "assigments.bin"));
  Clusters.Save(*TFOut::New(Dir + "clusters.bin"));
  // Print some details about the clusters.
  TClusterUtils::PrintClusterSizes(Clusters, *TFOut::New(Dir + "clusterSizes.txt"));
  TClusterUtils::PrintClusters(Clusters, Objects, NodeStrs, *TFOut::New(Dir + "clusters.txt"));

  printf("Computing similarities...\n");
  // Compute the similarity betweeen the objects.
  const int MaxNumSimilarObjects = 100;
  const int NumThreads = 10;
  TVec<TIntFltKdV> Similarities;
  TSimilarityUtils::ComputeSimilarities(ObjectMatrix, Assigments, Clusters, MaxNumSimilarObjects, NumThreads, Similarities);
  // Store the object similarities.
  Similarities.Save(*TFOut::New(Dir + "objectSimilarities.bin"));
  // Print the object similarities.
  TSimilarityUtils::PrintSimilarities(Similarities, Objects, NodeStrs, 10, *TFOut::New(Dir + "objectSimilarities.txt"));

  printf("Computing existing property matrix...\n");
  // Our goal is to compute the missing out-going properties.
  // Therefore, we create the matrix of existing out-going properties of the objects.
  TSparseColMatrix OutPropertyCountMatrix;
  TObjectUtils::GetOutPropertyCount(Objects, G, OutPropertyCountMatrix);
  TObjectUtils::PrintPropertyMatrix(OutPropertyCountMatrix, Objects, NodeStrs, PropStrs, *TFOut::New(Dir + "outPropertyCountMatrix.txt"));
  OutPropertyCountMatrix.Save(*TFOut::New(Dir + "outPropertyCountMatrix.bin"));

  printf("Computing missing properties...\n");
  // And finally, compute the missing properties.
  int MaxNumMissingProperties = 100;
  TVec<TIntFltKdV> MissingProperties;
  TPropertyUtils::GetMissingProperties(Similarities, OutPropertyCountMatrix, MaxNumMissingProperties, NumThreads, MissingProperties);
  // Store the missing properties data.
  MissingProperties.Save(*TFOut::New(Dir + "missingProperties.bin"));
  // Print missing properties.
  TPropertyUtils::PrintMissingProperties(MissingProperties, Objects, NodeStrs, PropStrs, 10, *TFOut::New(Dir + "missingProperties.txt"));
}
Exemple #5
0
/// Enumerate maximal cliques of the network on more than MinMaxCliqueSize nodes
void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
  TCliqueOverlap CO;
  MaxCliques.Clr(false);
  CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques);
}
Exemple #6
0
void LSH::MinHash(TQuoteBase *QB, THashSet<TMd5Sig>& Shingles,
    TVec<THash<TMd5Sig, TIntSet> >& SignatureBandBuckets) {
  Err("Creating buckets...\n");
  THash < TMd5Sig, TIntV > Signatures;
  ComputeSignatures(Shingles, Signatures, NumBands * BandSize);

  // bucket creation
  for (int i = 0; i < NumBands; ++i) {
    SignatureBandBuckets.Add(THash<TMd5Sig, TIntSet>());
  }


  // bucket filling
  int NumShingles = Shingles.Len();
  THash<TInt, TQuote> Quotes;
  QB->GetIdToTQuotes(Quotes);

  THash<TInt, TQuote>::TIter CurI = Quotes.BegI();
  THash<TInt, TQuote>::TIter EndI = Quotes.EndI();
  TQuote Q; // SKYFALL

  for (; CurI < EndI; CurI++) {
    Q = CurI.GetDat();

    TStrV Content;
    Q.GetParsedContent(Content);
    TInt Id = Q.GetId();

    // signature for quote
    int ContentLen = Content.Len();
    TVec < TIntV > Signature;
    for (int i = 0; i < ContentLen; i++) {
      const TMd5Sig ShingleMd5(Content[i]);
      Signature.Add(Signatures.GetDat(ShingleMd5));
    }

    // place in bucket
    if (ContentLen < WordWindow) {
      for (int i = 0; i < NumBands; ++i) {
        TStr Sig;
        for (int j = 0; j < BandSize; ++j) {
          int CurSig = i * BandSize + j;

          TInt min = NumShingles;
          for (int k = 0; k < ContentLen; k++) {
            if (Signature[k][CurSig] < min) {
              min = Signature[k][CurSig];
            }
          }
          Sig += min.GetStr() + "-";
        }
        //Err(Sig.CStr());

        const TMd5Sig SigMd5(Sig);
        TIntSet Bucket;
        SignatureBandBuckets[i].IsKeyGetDat(SigMd5, Bucket);
        Bucket.AddKey(Id);
        SignatureBandBuckets[i].AddDat(SigMd5, Bucket);
      }
    } else {

    }

  }
  Err("Minhash step complete!\n");
}
void TVizMapContext::PaintCatNms(PGks Gks, const int& KeyWdFontSize, 
        TVec<TFltRect>& PointNmRectV) {
        
    // calculate frequency of categories
    TIntH CatH; TIntFltPrH CatPosH;
    PBowDocBs BowDocBs = VizMapFrame->GetKeyWdBow();
    const int Points = VizMapFrame->GetPoints();
    for (int PointN = 0; PointN < Points; PointN++) {
        PVizMapPoint Point = VizMapFrame->GetPoint(PointN);
        const int DId = Point->GetDocId();
        const int CIds = BowDocBs->GetDocCIds(DId);
        for (int CIdN = 0; CIdN < CIds; CIdN++) {
            const int CId = BowDocBs->GetDocCId(DId, CIdN);
            CatH.AddDat(CId)++;
            CatPosH.AddDat(CId).Val1 += Point->GetPointX();
            CatPosH.AddDat(CId).Val2 += Point->GetPointY();
        }
        
    }
    CatH.SortByDat(false); 

    // draw the top cats
    const int TopCats = Points > 100 ? 6 : 4; 
    TFltRect ZoomRect = GetZoomRect();    
    Gks->SetFont(TGksFont::New("ARIAL", KeyWdFontSize + 3, ColorCatNmFont));
    TVec<TFltRect> CatNmRectV; TVec<TFltV> CatNmPosV;
    const int MnSize = TInt::GetMn(Gks->GetWidth(), Gks->GetHeight());
    const int MnDist = TFlt::Round(0.3 * double(MnSize));
    int Cats = 0, CatKeyId = CatH.FFirstKeyId();
    while (CatH.FNextKeyId(CatKeyId)) {
        if (Cats == TopCats) { break; } 
        if (double(CatH[CatKeyId]) / double(Points) < 0.05) { break; } 
        const int CId = CatH.GetKey(CatKeyId);
        // get name
        TStr CatNm = BowDocBs->GetCatNm(CId);
        if (CatFullNmH.IsKey(CatNm)) {
            CatNm = CatFullNmH.GetDat(CatNm);
        } else { continue; }
        // get position
        TFltPr CatPos = CatPosH.GetDat(CId);
        const int CatCount = CatH.GetDat(CId); IAssert(CatCount > 0);
        const double CatX = CatPos.Val1 / double(CatCount);
        const double CatY = CatPos.Val2 / double(CatCount);
        // is it within the zoom?
        if (!ZoomRect.IsXYIn(CatX, CatY)) { continue; }
        // calculate string size on the screen
        const int HalfTxtWidth = Gks->GetTxtWidth(CatNm) / 2;
        const int HalfTxtHeight = Gks->GetTxtHeight(CatNm) / 2;
        // get coordinates in pixels
        const int X = GetScreenCoord(CatX , ZoomRect.GetMnX(), 
            ZoomRect.GetXLen(), Gks->GetWidth());
        const int Y = GetScreenCoord(CatY, ZoomRect.GetMnY(), 
            ZoomRect.GetYLen(), Gks->GetHeight());
        // is it to close to any of the most prominent categories
        int CatNmDist = MnSize; TFltV CatNmPos = TFltV::GetV(double(X), double(Y));
        for (int CatNmPosN = 0; CatNmPosN < CatNmPosV.Len(); CatNmPosN++) {
            const double Dist = TLinAlg::EuclDist(CatNmPosV[CatNmPosN], CatNmPos);
            CatNmDist = TInt::GetMn(TFlt::Round(Dist), CatNmDist);
        }
        if (CatNmDist < MnDist) { continue; }
        // does it overlap with any of hte most prominent categories
        TFltRect CatNmRect(X - HalfTxtWidth, Y - HalfTxtHeight,
            X + HalfTxtWidth, Y + HalfTxtHeight);
        bool DoDraw = true; const int Rects = CatNmRectV.Len();
        for (int RectN = 0; (RectN < Rects) && DoDraw; RectN++) {
            DoDraw = !TFltRect::Intersection(CatNmRect, CatNmRectV[RectN]); }
        if (!DoDraw) { continue; }
        // draw it!
        Gks->PutTxt(CatNm, X - HalfTxtWidth, Y - HalfTxtHeight); 
        // remember string area
        CatNmRectV.Add(CatNmRect); Cats++;
        // remember string position
        CatNmPosV.Add(CatNmPos);
    }
    PointNmRectV.AddV(CatNmRectV);
}
Exemple #8
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("cesna. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix");
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "./1912.edges", "Input edgelist file name");
  const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) ");
  const TStr AttrFNm = Env.GetIfArgPrefixStr("-a:", "./1912.nodefeat", "Input node attribute file name");
  const TStr ANameFNm = Env.GetIfArgPrefixStr("-n:", "./1912.nodefeatnames", "Input file name for node attribute names");
  int OptComs = Env.GetIfArgPrefixInt("-c:", 10, "The number of communities to detect (-1: detect automatically)");
  const int MinComs = Env.GetIfArgPrefixInt("-mc:", 3, "Minimum number of communities to try");
  const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 20, "Maximum number of communities to try");
  const int DivComs = Env.GetIfArgPrefixInt("-nc:", 5, "How many trials for the number of communities");
  const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 4, "Number of threads for parallelization");
  const double AttrWeight = Env.GetIfArgPrefixFlt("-aw:", 0.5, "We maximize (1 - aw) P(Network) + aw * P(Attributes)");
  const double LassoWeight = Env.GetIfArgPrefixFlt("-lw:", 1.0, "Weight for l-1 regularization on learning the logistic model parameters");
  const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.05, "Alpha for backtracking line search");
  const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search");
  const double MinFeatFrac = Env.GetIfArgPrefixFlt("-mf:", 0.0, "If the fraction of nodes with positive values for an attribute is smaller than this, we ignore that attribute");

#ifndef NOMP
  omp_set_num_threads(NumThreads);
#endif
  PUNGraph G;
  TIntStrH NIDNameH;
  TStrHash<TInt> NodeNameH;
  TVec<TFltV> Wck;
  TVec<TIntV> EstCmtyVV;
  if (InFNm.IsStrIn(".ungraph")) {
    TFIn GFIn(InFNm);
    G = TUNGraph::Load(GFIn);
  } else {
    G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NodeNameH);
    NIDNameH.Gen(NodeNameH.Len());
    for (int s = 0; s < NodeNameH.Len(); s++) { NIDNameH.AddDat(s, NodeNameH.GetKey(s)); }

  }
  if (LabelFNm.Len() > 0) {
    TSsParser Ss(LabelFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
    }
  }
  printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges());

  //load attribute
  TIntV NIDV;
  G->GetNIdV(NIDV);
  THash<TInt, TIntV> RawNIDAttrH, NIDAttrH;
  TIntStrH RawFeatNameH, FeatNameH;
  if (ANameFNm.Len() > 0) {
    TSsParser Ss(ANameFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { RawFeatNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
    }
  }

  TCesnaUtil::LoadNIDAttrHFromNIDKH(NIDV, AttrFNm, RawNIDAttrH, NodeNameH);
  TCesnaUtil::FilterLowEntropy(RawNIDAttrH, NIDAttrH, RawFeatNameH, FeatNameH, MinFeatFrac);

  TExeTm RunTm;
  TCesna CS(G, NIDAttrH, 10, 10);
  
  if (OptComs == -1) {
    printf("finding number of communities\n");
    OptComs = CS.FindComs(NumThreads, MaxComs, MinComs, DivComs, "", false, 0.1, StepAlpha, StepBeta);
  }

  CS.NeighborComInit(OptComs);
  CS.SetWeightAttr(AttrWeight);
  CS.SetLassoCoef(LassoWeight);
  if (NumThreads == 1 || G->GetEdges() < 1000) {
    CS.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta);
  } else {
    CS.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta);
  }
  CS.GetCmtyVV(EstCmtyVV, Wck);
  TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH);
  FILE* F = fopen((OutFPrx + "weights.txt").CStr(), "wt");
  if (FeatNameH.Len() == Wck[0].Len()) {
    fprintf(F, "#");
    for (int k = 0; k < FeatNameH.Len(); k++) {
      fprintf(F, "%s", FeatNameH[k].CStr());
      if (k < FeatNameH.Len() - 1) { fprintf(F, "\t"); }
    }
    fprintf(F, "\n");
  }
  for (int c = 0; c < Wck.Len(); c++) {
    for (int k = 0; k < Wck[c].Len(); k++) {
      fprintf(F, "%f", Wck[c][k].Val);
      if (k < Wck[c].Len() - 1) { fprintf(F, "\t"); }
    }
    fprintf(F, "\n");
  }
  fclose(F);

  Catch

  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  return 0;
}
Exemple #9
0
// Test node, edge attribute functionality
TEST(TNEANet, ManipulateNodesEdgeAttributes) {
  int NNodes = 1000;
  int NEdges = 1000;
  const char *FName = "demo.graph.dat";

  PNEANet Graph;
  PNEANet Graph1;
  int i;
  int x, y;
  bool t;

  Graph = TNEANet::New();
  t = Graph->Empty();

  // create the nodes
  for (i = NNodes - 1; i >= 0; i--) {
    Graph->AddNode(i);
  }

  EXPECT_EQ(NNodes, Graph->GetNodes());

  // create the edges 
  for (i = NEdges - 1; i >= 0; i--) {
    x = (long) (drand48() * NNodes);
    y = (long) (drand48() * NNodes);
    Graph->AddEdge(x, y, i);
  }

  // create attributes and fill all nodes
  TStr attr1 = "str";
  TStr attr2 = "int";
  TStr attr3 = "float";
  TStr attr4 = "default";

  // Test vertical int iterator for node 3, 50, 700, 900
  // Check if we can set defaults to 0 for Int data.
  Graph->AddIntAttrN(attr2, 0);
  Graph->AddIntAttrDatN(3, 3*2, attr2);
  Graph->AddIntAttrDatN(50, 50*2, attr2);
  Graph->AddIntAttrDatN(700, 700*2, attr2);
  Graph->AddIntAttrDatN(900, 900*2, attr2);

  EXPECT_EQ(3*2, Graph->GetNAIntI(attr2, 3).GetDat());
  EXPECT_EQ(50*2, Graph->GetNAIntI(attr2, 50).GetDat());

  int NodeId = 0;
  int DefNodes = 0;
  TVec<TInt> TAIntIV = TVec<TInt>();
  for (TNEANet::TAIntI NI = Graph->BegNAIntI(attr2);
    NI < Graph->EndNAIntI(attr2); NI++) {
    if (NI.GetDat()() != 0) {
      TAIntIV.Add(NI.GetDat());
      NodeId++;
    } else {
      DefNodes++;
    }
  }
  
  EXPECT_EQ(4, NodeId);
  EXPECT_EQ(NNodes - 4, DefNodes);
  TAIntIV.Sort();
  EXPECT_EQ(3*2, TAIntIV[0]);
  EXPECT_EQ(50*2, TAIntIV[1]);
  EXPECT_EQ(700*2, TAIntIV[2]);
  EXPECT_EQ(900*2, TAIntIV[3]);

  // Test vertical flt iterator for node 3, 50, 700, 900
  Graph->AddFltAttrDatN(5, 3.41, attr3);
  Graph->AddFltAttrDatN(50, 2.718, attr3);
  Graph->AddFltAttrDatN(300, 150.0, attr3);
  Graph->AddFltAttrDatN(653, 563, attr3);

  EXPECT_EQ(3.41, Graph->GetNAFltI(attr3, 5).GetDat());
  EXPECT_EQ(2.718, Graph->GetNAFltI(attr3, 50).GetDat());

  NodeId = 0;
  DefNodes = 0;
  TVec<TFlt> TAFltIV = TVec<TFlt>();

  for (TNEANet::TAFltI NI = Graph->BegNAFltI(attr3);
    NI < Graph->EndNAFltI(attr3); NI++) {
    if (NI.GetDat() != TFlt::Mn) {
      NodeId++;
      TAFltIV.Add(NI.GetDat());
    } else {
      DefNodes++;
    }
  }

  EXPECT_EQ(4, NodeId);
  EXPECT_EQ(NNodes - 4, DefNodes);
  TAFltIV.Sort();
  EXPECT_EQ(2.718, TAFltIV[0]);
  EXPECT_EQ(3.41, TAFltIV[1]);
  EXPECT_EQ(150.0, TAFltIV[2]);
  EXPECT_EQ(563.0, TAFltIV[3]);

  // Test vertical str iterator for node 3, 50, 700, 900
  Graph->AddStrAttrDatN(10, "abc", attr1);
  Graph->AddStrAttrDatN(20, "def", attr1);
  Graph->AddStrAttrDatN(400, "ghi", attr1);
  // this does not show since ""=null
  Graph->AddStrAttrDatN(455, "", attr1);

  EXPECT_EQ('c', Graph->GetNAStrI(attr1, 10).GetDat().LastCh());
  EXPECT_EQ('f', Graph->GetNAStrI(attr1, 20).GetDat().LastCh());

  NodeId = 0;
  DefNodes = 0;
  TVec<TStr> TAStrIV = TVec<TStr>();

  for (TNEANet::TAStrI NI = Graph->BegNAStrI(attr1);
    NI < Graph->EndNAStrI(attr1); NI++) {
    if (NI.GetDat() != TStr::GetNullStr()) {
      NodeId++;
      TAStrIV.Add(NI.GetDat());
    } else {
      DefNodes++;
    }
  }

  EXPECT_EQ(3, NodeId);
  EXPECT_EQ(NNodes - 3, DefNodes);
  TAStrIV.Sort();
  // TODO(nkhadke): Fix hack to compare strings properly. This works for now.
  EXPECT_EQ('c', TAStrIV[0].LastCh());
  EXPECT_EQ('f', TAStrIV[1].LastCh());
  EXPECT_EQ('i', TAStrIV[2].LastCh());
  
    
  // Test vertical iterator over many types (must skip default/deleted attr) 
  int NId = 55;
  Graph->AddStrAttrDatN(NId, "aaa", attr1);
  Graph->AddIntAttrDatN(NId, 3*2, attr2);
  Graph->AddFltAttrDatN(NId, 3.41, attr3);
  Graph->AddStrAttrDatN(80, "dont appear", attr4); // should not show up
  TStrV NIdAttrName;
  Graph->AttrNameNI(NId, NIdAttrName);
  int AttrLen = NIdAttrName.Len();
  NodeId = 0;
  DefNodes = 0;
  EXPECT_EQ(3, AttrLen);
  
  Graph->DelAttrDatN(NId, attr2);
  Graph->AttrNameNI(NId, NIdAttrName);
  AttrLen = NIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("int") == NIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(1,2);
    }
  }
  EXPECT_EQ(2, AttrLen);

  Graph->AddIntAttrDatN(NId, 3*2, attr2);
  Graph->DelAttrN(attr1);
  Graph->AttrNameNI(NId, NIdAttrName);
  AttrLen = NIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("str") == NIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(1,2);
    }
  }
  EXPECT_EQ(2, AttrLen);
   
  TStrV NIdAttrValue;
  Graph->AttrValueNI(NId, NIdAttrValue);
  AttrLen = NIdAttrValue.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("str") == NIdAttrValue[i]()) {
      // FAIL
      EXPECT_EQ(1,2);
    }
  } 

  int expectedTotal = 0;
  for (i = 0; i <NNodes; i++) {
    Graph->AddIntAttrDatN(i, NNodes+i, attr2);
    EXPECT_EQ(NNodes+i, Graph->GetIntAttrDatN(i, attr2));
    expectedTotal += NNodes+i;
  }

  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    Graph1 = TNEANet::Load(FIn);
  }

  int total = 0;
  for (TNEANet::TAIntI NI = Graph1->BegNAIntI(attr2);
    NI < Graph1->EndNAIntI(attr2); NI++) {
    total += NI.GetDat();
  }

  ASSERT_EQ(expectedTotal, total);

  Graph1->Clr();

  // Test vertical int iterator for edge
  Graph->AddIntAttrDatE(3, 3*2, attr2);
  Graph->AddIntAttrDatE(55, 55*2, attr2);
  Graph->AddIntAttrDatE(705, 705*2, attr2);
  Graph->AddIntAttrDatE(905, 905*2, attr2);

  EXPECT_EQ(3*2, Graph->GetEAIntI(attr2, 3).GetDat());
  EXPECT_EQ(55*2, Graph->GetEAIntI(attr2, 55).GetDat());

  int EdgeId = 0;
  int DefEdges = 0;
  TAIntIV.Clr();
  for (TNEANet::TAIntI EI = Graph->BegEAIntI(attr2);
    EI < Graph->EndEAIntI(attr2); EI++) {
    if (EI.GetDat() != TInt::Mn) {
      TAIntIV.Add(EI.GetDat());
      EdgeId++;
    } else {
      DefEdges++;
    }
  } 

  EXPECT_EQ(4, EdgeId);
  EXPECT_EQ(NEdges - 4, DefEdges);
  TAIntIV.Sort();
  EXPECT_EQ(3*2, TAIntIV[0]);
  EXPECT_EQ(55*2, TAIntIV[1]);
  EXPECT_EQ(705*2, TAIntIV[2]);
  EXPECT_EQ(905*2, TAIntIV[3]);
   
  // Test vertical flt iterator for edge
  Graph->AddFltAttrE(attr3, 0.00);
  Graph->AddFltAttrDatE(5, 4.41, attr3);
  Graph->AddFltAttrDatE(50, 3.718, attr3);
  Graph->AddFltAttrDatE(300, 151.0, attr3);
  Graph->AddFltAttrDatE(653, 654, attr3);

  EXPECT_EQ(4.41, Graph->GetEAFltI(attr3, 5).GetDat());
  EXPECT_EQ(3.718, Graph->GetEAFltI(attr3, 50).GetDat());

  EdgeId = 0;
  DefEdges = 0;
  TAFltIV.Clr();

  for (TNEANet::TAFltI EI = Graph->BegEAFltI(attr3);
    EI < Graph->EndEAFltI(attr3); EI++) {
    // Check if defaults are set to 0.
    if (EI.GetDat() != 0.00) {
      TAFltIV.Add(EI.GetDat());
      EdgeId++;
    } else {
      DefEdges++;
    }
  }

  EXPECT_EQ(4, EdgeId);
  EXPECT_EQ(NEdges - 4, DefEdges);
  TAFltIV.Sort();
  EXPECT_EQ(3.718, TAFltIV[0]);
  EXPECT_EQ(4.41, TAFltIV[1]);
  EXPECT_EQ(151.0, TAFltIV[2]);
  EXPECT_EQ(654.0, TAFltIV[3]);

  // Test vertical str iterator for edge
  Graph->AddStrAttrDatE(10, "abc", attr1);
  Graph->AddStrAttrDatE(20, "def", attr1);
  Graph->AddStrAttrDatE(400, "ghi", attr1);
  // this does not show since ""=null
  Graph->AddStrAttrDatE(455, "", attr1);

  EXPECT_EQ('c', Graph->GetEAStrI(attr1, 10).GetDat().LastCh());
  EXPECT_EQ('f', Graph->GetEAStrI(attr1, 20).GetDat().LastCh());

  EdgeId = 0;
  DefEdges = 0;
  TAStrIV.Clr();

  for (TNEANet::TAStrI EI = Graph->BegEAStrI(attr1);
    EI < Graph->EndEAStrI(attr1); EI++) {
    if (EI.GetDat() != TStr::GetNullStr()) {
      TAStrIV.Add(EI.GetDat());
      EdgeId++;
    } else {
      DefEdges++;
    }
  } 

  EXPECT_EQ(3, EdgeId);
  EXPECT_EQ(NEdges - 3, DefEdges);
  TAStrIV.Sort();
  // TODO(nkhadke): Fix hack to compare strings properly. This works for now.
  EXPECT_EQ('c', TAStrIV[0].LastCh());
  EXPECT_EQ('f', TAStrIV[1].LastCh());
  EXPECT_EQ('i', TAStrIV[2].LastCh());

  // Test vertical iterator over many types (must skip default/deleted attr) 
  int EId = 55;
  Graph->AddStrAttrDatE(EId, "aaa", attr1);
  Graph->AddIntAttrDatE(EId, 3*2, attr2);
  Graph->AddFltAttrDatE(EId, 3.41, attr3);
  Graph->AddStrAttrDatE(80, "dont appear", attr4); // should not show up  

  TStrV EIdAttrName;
  Graph->AttrNameEI(EId, EIdAttrName);
  AttrLen = EIdAttrName.Len();
  EXPECT_EQ(3, AttrLen);
  
  Graph->DelAttrDatE(EId, attr2);
  Graph->AttrNameEI(EId, EIdAttrName);
  AttrLen = EIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("int") == EIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(2,3);
    }
  }

  Graph->AddIntAttrDatE(EId, 3*2, attr2);
  Graph->DelAttrE(attr1);
  Graph->AttrNameEI(EId, EIdAttrName);
  AttrLen = EIdAttrName.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("aaa") == EIdAttrName[i]()) {
      // FAIL
      EXPECT_EQ(2,3);
    }
  }

  TStrV EIdAttrValue;
  Graph->AttrValueEI(EId, EIdAttrValue);
  AttrLen = EIdAttrValue.Len();
  for (i = 0; i < AttrLen; i++) {
    if (TStr("str") == EIdAttrValue[i]()) {
      // FAIL
      EXPECT_EQ(2,3);
    }
  }

  expectedTotal = 0;
  for (i = 0; i <NEdges; i++) {
    Graph->AddIntAttrDatE(i, NEdges+i, attr2);
    EXPECT_EQ(NEdges+i, Graph->GetIntAttrDatE(i, attr2));
    expectedTotal += NEdges+i;
  }

  {
    TFOut FOut(FName);
    Graph->Save(FOut);
    FOut.Flush();
    Graph->Clr();
  }

  {
    TFIn FIn(FName);
    Graph1 = TNEANet::Load(FIn);
  }

  total = 0;
  for (TNEANet::TAIntI EI = Graph1->BegNAIntI(attr2);
    EI < Graph1->EndNAIntI(attr2); EI++) {
    total += EI.GetDat();
  }

  EXPECT_EQ(expectedTotal, total);

  //Graph1->Dump();
  Graph1->Clr();
}
void TVizMapContext::PaintPoints(PGks Gks, const int& PointFontSize, 
        const int& PointNmFontScale, const double& PointWgtThreshold, 
        const int& CatId, const bool& ShowMgGlassP,
        TVec<TFltRect>& PointNmRectV) {

    int Points = VizMapFrame->GetPoints();
    TFltRect ZoomRect = GetZoomRect();
    for (int PointN = 0; PointN < Points; PointN++) {
        PVizMapPoint Point = VizMapFrame->GetPoint(PointN);
        // we ignore selected and nearest point in the first run
        bool IsSelPointP = SelPointV.IsInBin(PointN);
        if (IsSelPointP || ((NearPointN == PointN) && !ShowMgGlassP)) { continue; }
        const double PointX = Point->GetPointX(), PointY = Point->GetPointY();
        if (ZoomRect.IsXYIn(PointX, PointY)) {
            // get coordinates in pixels
            const int X = GetScreenCoord(PointX, ZoomRect.GetMnX(), 
                ZoomRect.GetXLen(), Gks->GetWidth());
            const int Y = GetScreenCoord(PointY, ZoomRect.GetMnY(), 
                ZoomRect.GetYLen(), Gks->GetHeight());
            // check if point has given category
            bool IsCatP = Point->IsCatId(CatId);
            // check if the point is under threshold
            if (Point->IsPointNm() && 
                ((Point->GetWgt() > PointWgtThreshold) || IsSelPointP)) {

                // write full point name
                PointNmRectV.Add(PaintPointNm(Gks, Point, X, Y, 
                    PointFontSize, PointNmFontScale, IsSelPointP, IsCatP));
            } else {
                // draw a cross
                PaintPointCross(Gks, X, Y, IsSelPointP, IsCatP);
            }
        }
    }
    // paint selected points
    for (int SelPointN = 0; SelPointN < SelPointV.Len(); SelPointN++) {
        const int PointN = SelPointV[SelPointN];
        if ((NearPointN == PointN) && !ShowMgGlassP) { continue; }
        PVizMapPoint Point = VizMapFrame->GetPoint(PointN);
        const double PointX = Point->GetPointX(), PointY = Point->GetPointY();
        if (ZoomRect.IsXYIn(PointX, PointY)) {
            // get coordinates in pixels
            const int X = GetScreenCoord(PointX, ZoomRect.GetMnX(), 
                ZoomRect.GetXLen(), Gks->GetWidth());
            const int Y = GetScreenCoord(PointY, ZoomRect.GetMnY(), 
                ZoomRect.GetYLen(), Gks->GetHeight());
            // check if point has given category
            bool IsCatP = Point->IsCatId(CatId);
            // check if the point is under threshold
            if (Point->IsPointNm()) {
                // write full point name
                PointNmRectV.Add(PaintPointNm(Gks, Point, X, Y, 
                    PointFontSize, PointNmFontScale, true, IsCatP));
            } else {
                // draw a cross
                PaintPointCross(Gks, X, Y, true, IsCatP);
            }
        }
    }
    // paint nearest point
    if (!ShowMgGlassP && (NearPointN != -1)) {
        PVizMapPoint Point = VizMapFrame->GetPoint(NearPointN);
        const double PointX = Point->GetPointX(), PointY = Point->GetPointY();
        if (ZoomRect.IsXYIn(PointX, PointY)) {
            // get coordinates in pixels
            const int X = GetScreenCoord(PointX, ZoomRect.GetMnX(), 
                ZoomRect.GetXLen(), Gks->GetWidth());
            const int Y = GetScreenCoord(PointY, ZoomRect.GetMnY(), 
                ZoomRect.GetYLen(), Gks->GetHeight());
            // check if point has given category
            bool IsCatP = Point->IsCatId(CatId);
            // check if point is selected
            bool IsSelPointP = SelPointV.IsInBin(NearPointN);
            // check if the point is under threshold
            if (Point->IsPointNm()) {
                // write full point name
                PointNmRectV.Add(PaintPointNm(Gks, Point, X, Y, 
                    PointFontSize, PointNmFontScale, IsSelPointP, IsCatP));
            } else {
                // draw a cross
                PaintPointCross(Gks, X, Y, IsSelPointP, IsCatP);
            }
        }
    }
}
/// estimate number of communities using AGM
int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
    TRnd Rnd(RndSeed);
    int LambdaIter = 100;
    if (Graph->GetNodes() < 200) {
        LambdaIter = 1;
    }
    if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) {
        LambdaIter = 100;
    }

    //Find coms with large C
    TAGMFit AGMFitM(Graph, InitComs, RndSeed);
    if (PNoCom > 0.0) {
        AGMFitM.SetPNoCom(PNoCom);
    }
    AGMFitM.RunMCMC(MaxIter, LambdaIter, "");

    int TE = Graph->GetEdges();
    TFltV RegV;
    RegV.Add(0.3 * TE);
    for (int r = 0; r < 25; r++) {
        RegV.Add(RegV.Last() * RegGap);
    }
    TFltPrV RegComsV, RegLV, RegBICV;
    TFltV LV, BICV;
    //record likelihood and number of communities with nonzero P_c
    for (int r = 0; r < RegV.Len(); r++) {
        double RegCoef = RegV[r];
        AGMFitM.SetRegCoef(RegCoef);
        AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
        AGMFitM.SetRegCoef(0.0);

        TVec<TIntV> EstCmtyVV;
        AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
        int NumLowQ = EstCmtyVV.Len();
        RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));

        if (EstCmtyVV.Len() > 0) {
            TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
            AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
            double CurL = AFTemp.Likelihood();
            LV.Add(CurL);
            BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
        }
        else {
            break;
        }
    }
    // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
    if (LV.Len() == 0) {
        return 2;
    }
    else if (LV[0] == LV.Last()) {
        return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2);
    }


    //normalize likelihood and BIC to 0~100
    int MaxL = 100;
    {
        TFltV& ValueV = LV;
        TFltPrV& RegValueV = RegLV;
        double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
        for (int l = 0; l < ValueV.Len(); l++) {
            if (ValueV[l] < MinValue) {
                MinValue = ValueV[l];
            }
            if (ValueV[l] > MaxValue) {
                MaxValue = ValueV[l];
            }
        }
        while (ValueV.Len() < RegV.Len()) {
            ValueV.Add(MinValue);
        }
        double RangeVal = MaxValue - MinValue;
        for (int l = 0; l < ValueV.Len(); l++) {
            RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
        }

    }
    {
        TFltV& ValueV = BICV;
        TFltPrV& RegValueV = RegBICV;
        double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
        for (int l = 0; l < ValueV.Len(); l++) {
            if (ValueV[l] < MinValue) {
                MinValue = ValueV[l];
            }
            if (ValueV[l] > MaxValue) {
                MaxValue = ValueV[l];
            }
        }
        while (ValueV.Len() < RegV.Len()) {
            ValueV.Add(MaxValue);
        }
        double RangeVal = MaxValue - MinValue;
        for (int l = 0; l < ValueV.Len(); l++) {
            RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
        }
    }

    //fit logistic regression to normalized likelihood.
    TVec<TFltV> XV(RegLV.Len());
    TFltV YV (RegLV.Len());
    for (int l = 0; l < RegLV.Len(); l++) {
        XV[l] = TFltV::GetV(log(RegLV[l].Val1));
        YV[l] = RegLV[l].Val2 / (double) MaxL;
    }
    TFltPrV LRVScaled, LRV;
    TLogRegFit LRFit;
    PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
    for (int l = 0; l < RegLV.Len(); l++) {
        LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
        LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
    }

    //estimate # communities from fitted logistic regression
    int NumComs = 0, IdxRegDrop = 0;
    double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
    double LeftReg = 0.0, RightReg = 0.0;
    TFltV Theta;
    LRMd->GetTheta(Theta);
    RegDrop = (- Theta[1] - LRThres) / Theta[0];
    if (RegDrop <= XV[0][0]) {
        NumComs = (int) RegComsV[0].Val2;
    }
    else if (RegDrop >= XV.Last()[0]) {
        NumComs = (int) RegComsV.Last().Val2;
    }
    else {  //interpolate for RegDrop
        for (int i = 0; i < XV.Len(); i++) {
            if (XV[i][0] > RegDrop) {
                IdxRegDrop = i;
                break;
            }
        }

        if (IdxRegDrop == 0) {
            printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
            for (int l = 0; l < RegLV.Len(); l++) {
                printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
            }
        }
        IAssert(IdxRegDrop > 0);
        LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
        RightReg = XV[IdxRegDrop][0] - RegDrop;
        NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));

    }
    //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
    printf("Num Coms:%d\n", NumComs);
    if (NumComs < 2) {
        NumComs = 2;
    }

    if (PltFPrx.Len() > 0) {
        TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
        TGnuPlot GPC(PltFPrx + ".l");
        GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
        GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
        GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
        GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
        GPC.SetScale(gpsLog10X);
        GPC.SetTitle(PlotTitle);
        GPC.SavePng(PltFPrx + ".l.png");
    }

    return NumComs;
}
/// save graph into a gexf file which Gephi can read
void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) {
    THash<TInt,TIntV> NIDComVHAtr;
    TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr);

    FILE* F = fopen(OutFNm.CStr(), "wt");
    fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
    fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
    fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n");
    if (CmtyVVAtr.Len() > 0) {
        fprintf(F, "\t<attributes class='node'>\n");
        for (int c = 0; c < CmtyVVAtr.Len(); c++) {
            fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c);
            fprintf(F, "\t\t<default>false</default>\n");
            fprintf(F, "\t\t</attribute>\n");
        }
        fprintf(F, "\t</attributes>\n");
    }
    fprintf(F, "\t\t<nodes>\n");
    for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
        int NID = NI.GetId();
        TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
        Label.ChangeChAll('<', ' ');
        Label.ChangeChAll('>', ' ');
        Label.ChangeChAll('&', ' ');
        Label.ChangeChAll('\'', ' ');

        TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);

        double Size = MinSz;
        double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len();
        if (NIDComVHAtr.IsKey(NID)) {
            Size = MinSz +  SizeStep *  (double) NIDComVHAtr.GetDat(NID).Len();
        }
        double Alpha = 1.0;
        fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
        //specify attributes
        if (NIDComVHAtr.IsKey(NID)) {
            fprintf(F, "\t\t\t\t<attvalues>\n");
            for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) {
                int CID = NIDComVHAtr.GetDat(NID)[c];
                fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID);
            }
            fprintf(F, "\t\t\t\t</attvalues>\n");
        }

        fprintf(F, "\t\t\t</node>\n");
    }
    fprintf(F, "\t\t</nodes>\n");
    //plot edges
    int EID = 0;
    fprintf(F, "\t\t<edges>\n");
    for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
        for (int e = 0; e < NI.GetOutDeg(); e++) {
            if (NI.GetId() > NI.GetOutNId(e)) {
                continue;
            }
            fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e));
        }
    }
    fprintf(F, "\t\t</edges>\n");
    fprintf(F, "\t</graph>\n");
    fprintf(F, "</gexf>\n");
    fclose(F);
}
void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) {
    OutV.Gen(X.Len());
    for (int r = 0; r < X.Len(); r++) {
        OutV[r] = GetCfy(X[r], NewTheta);
    }
}
Exemple #14
0
void TIndex::TQmGixSumMerger<TQmGixItem>::Union(TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV) const {
    TVec<TQmGixItem> ResV; int ValN1 = 0; int ValN2 = 0;
    while ((ValN1 < MainV.Len()) && (ValN2 < JoinV.Len())) {
        const TQmGixItem& Val1 = MainV.GetVal(ValN1);
        const TQmGixItem& Val2 = JoinV.GetVal(ValN2);
        if (Val1 < Val2) { ResV.Add(Val1); ValN1++; }
        else if (Val1 > Val2) { ResV.Add(Val2); ValN2++; }
        else { ResV.Add(TQmGixItem(Val1.Key, Val1.Dat + Val2.Dat)); ValN1++; ValN2++; }
    }
    for (int RestValN1 = ValN1; RestValN1 < MainV.Len(); RestValN1++) {
        ResV.Add(MainV.GetVal(RestValN1));
    }
    for (int RestValN2 = ValN2; RestValN2 < JoinV.Len(); RestValN2++) {
        ResV.Add(JoinV.GetVal(RestValN2));
    }
    MainV = ResV;
}
Exemple #15
0
void TIndex::TQmGixSumMerger<TQmGixItem>::Minus(const TVec<TQmGixItem>& MainV,
        const TVec<TQmGixItem>& JoinV, TVec<TQmGixItem>& ResV) const {

    MainV.Diff(JoinV, ResV);
}