int main(int argc, char *argv[]) { // #### SETUP: Parse Arguments LogOutput Log; THash<TStr, TStr> Arguments; ArgumentParser::ParseArguments(argc, argv, Arguments, Log); TStr OutputDirectory; TStr StartString = ArgumentParser::GetArgument(Arguments, "start", "2009-02-01"); TStr QBDBDirectory = ArgumentParser::GetArgument(Arguments, "qbdb", QBDB_DIR_DEFAULT); TStr OutDirectory = ArgumentParser::GetArgument(Arguments, "out", "/lfs/1/tmp/curis/"); TInt WindowSize = ArgumentParser::GetArgument(Arguments, "window", "14").GetInt(); if (ArgumentParser::GetArgument(Arguments, "nolog", "") == "") { Log.DisableLogging(); } else if (!Arguments.IsKeyGetDat("directory", OutputDirectory)) { Log.SetupNewOutputDirectory(""); } else { Log.SetDirectory(OutputDirectory); } // #### DATA LOADING: Load ALL the things! TQuoteBase QB; TDocBase DB; fprintf(stderr, "Loading QB and DB from file for %d days, starting from %s...\n", WindowSize.Val, StartString.CStr()); Err("%s\n", QBDBDirectory.CStr()); TSecTm PresentTime = TDataLoader::LoadQBDBByWindow(QBDBDirectory, StartString, WindowSize, QB, DB); fprintf(stderr, "QBDB successfully loaded!\n"); TVec<TSecTm> PubTmV; TVec<TStr> PostUrlV; TVec<TStr> QuoteV; fprintf(stderr, "Dumping quotes to file...\n"); TIntV QuoteIds; QB.GetAllQuoteIds(QuoteIds); for (int i = 0; i < QuoteIds.Len(); i++) { TQuote Q; QB.GetQuote(QuoteIds[i], Q); TStr QContentString; Q.GetContentString(QContentString); TVec<TUInt> Sources; Q.GetSources(Sources); for (int j = 0; j < Sources.Len(); j++) { TDoc D; DB.GetDoc(Sources[j], D); TStr PostUrl; D.GetUrl(PostUrl); TSecTm PostTime = D.GetDate(); QuoteV.Add(QContentString); PubTmV.Add(PostTime); PostUrlV.Add(PostUrl); } } TFOut FOut(OutDirectory + "QuoteList" + ".bin"); PubTmV.Save(FOut); PostUrlV.Save(FOut); QuoteV.Save(FOut); fprintf(stderr, "Done!\n"); return 0; }
TQmParam(const TStr& FNm) { EAssertR(TFile::Exists(FNm), "Missing configuration file " + FNm); // load configuration file PJsonVal ConfigVal = TJsonVal::GetValFromSIn(TFIn::New(FNm)); EAssertR(ConfigVal->IsObj(), "Invalid setting file - not valid JSON"); // parse out common stuff RootFPath = TStr::GetNrFPath(ConfigVal->GetObjStr("directory", TDir::GetCurDir())); LockFNm = RootFPath + "./lock"; DbFPath = ConfigVal->GetObjStr("database", "./db/"); PortN = TFlt::Round(ConfigVal->GetObjNum("port")); // parse out unicode definition file TStr UnicodeFNm = ConfigVal->GetObjStr("unicode", TQm::TEnv::QMinerFPath + "./UnicodeDef.Bin"); if (!TUnicodeDef::IsDef()) { TUnicodeDef::Load(UnicodeFNm); } // parse cache if (ConfigVal->IsObjKey("cache")) { PJsonVal CacheVal = ConfigVal->GetObjKey("cache"); // parse out index and default store cache sizes IndexCacheSize = int64(CacheVal->GetObjNum("index", 1024)) * int64(TInt::Mega); StoreCacheSize = int64(CacheVal->GetObjNum("store", 1024)) * int64(TInt::Mega); // prase out store specific sizes, when available if (CacheVal->IsObjKey("stores")) { PJsonVal StoreCacheVals = CacheVal->GetObjKey("stores"); for (int StoreN = 0; StoreN < StoreCacheVals->GetArrVals(); StoreN++) { PJsonVal StoreCacheVal = StoreCacheVals->GetArrVal(StoreN); TStr StoreName = StoreCacheVal->GetObjStr("name"); uint64 StoreCacheSize = int64(StoreCacheVal->GetObjNum("size")) * int64(TInt::Mega); StoreCacheSizes.AddDat(StoreName, StoreCacheSize); } } } else { // default sizes are set to 1GB for index and stores IndexCacheSize = int64(1024) * int64(TInt::Mega); StoreCacheSize = int64(1024) * int64(TInt::Mega); } // load scripts if (ConfigVal->IsObjKey("script")) { // we have configuration file, read it PJsonVal JsVals = ConfigVal->GetObjKey("script"); if (JsVals->IsArr()) { for (int JsValN = 0; JsValN < JsVals->GetArrVals(); JsValN++) { JsParamV.Add(TJsParam(RootFPath, JsVals->GetArrVal(JsValN))); } } else { JsParamV.Add(TJsParam(RootFPath, JsVals)); } } else { // no settings for scripts, assume default setting TStr SrcFPath = TStr::GetNrAbsFPath("src", RootFPath); TFFile File(SrcFPath, ".js", false); TStr SrcFNm; while (File.Next(SrcFNm)) { JsParamV.Add(TJsParam(RootFPath, SrcFNm)); } } // load serving folders //TODO: Add to qm config ability to edit this if (ConfigVal->IsObjKey("wwwroot")) { PJsonVal WwwVals = ConfigVal->GetObjKey("wwwroot"); if (WwwVals->IsArr()) { for (int WwwValN = 0; WwwValN < WwwVals->GetArrVals(); WwwValN++) { AddWwwRoot(WwwVals->GetArrVal(WwwValN)); } } else { AddWwwRoot(WwwVals); } } // check for folder with admin GUI TStr GuiFPath = TStr::GetNrAbsFPath("gui", TQm::TEnv::QMinerFPath); if (TDir::Exists(GuiFPath)) { WwwRootV.Add(TStrPr("admin", GuiFPath)); } }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // get command line parameters Env.PrepArgs("Bag-Of-Words To Bag-Of-Word-Weights using precalculated weights"); TStr InBowFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-BagOfWords-FileName"); TStr OutBowwFNm=Env.GetIfArgPrefixStr("-o:", "", "Output-BagOfWordWeights-FileName"); TStr Type=Env.GetIfArgPrefixStr("-type:", "", "Method-Type (load, svm)"); TStr InWgtFNm=Env.GetIfArgPrefixStr("-iwgt:", "", "Input-Matlab-WordWeights-FileName"); double SvmC=Env.GetIfArgPrefixFlt("-svmcost:", 1.0, "Svm-Cost-Parameter"); int SvmTime=Env.GetIfArgPrefixInt("-svmtime:", 60, "Max-Time-per-Model (in seconds)"); bool PutUntiNorm=Env.GetIfArgPrefixBool("-unitnorm:", false, "Normalize-Document-Vectors"); double CutWordWgtSumPrc=Env.GetIfArgPrefixFlt("-cutww:", 0.0, "Cut-Word-Weight-Sum-Percentage"); int MnWordFq=Env.GetIfArgPrefixInt("-mnwfq:", 0, "Minimal-Word-Frequency"); if (Env.IsEndOfRun()){return 0;} // load bow data printf("Loading bag-of-words data from '%s' ...", InBowFNm.CStr()); PBowDocBs BowDocBs=TBowDocBs::LoadBin(InBowFNm); printf(" Done.\n"); PBowDocWgtBs BowDocWgtBs; if (Type == "load") { // loading word weights printf("Loading word-weights data from '%s' ...", InWgtFNm.CStr()); TVec<TFltV> WordWgtVV; Fail; //TLAMisc::LoadTFltVV(InWgtFNm, WordWgtVV); IAssert(WordWgtVV.Len() == 1); printf(" Done.\n"); TFltV& WordWgtV = WordWgtVV[0]; for (int WgtN = 0; WgtN < WordWgtV.Len(); WgtN++) { if (WordWgtV[WgtN] > 0.0) { WordWgtV[WgtN] = sqrt(WordWgtV[WgtN]); } } // calculate boww data printf("Calculating bag-of-word-weights data ..."); BowDocWgtBs = TBowDocWgtBs::NewPreCalcWgt(BowDocBs, WordWgtV, PutUntiNorm, CutWordWgtSumPrc, MnWordFq); printf(" Done.\n"); } else if (Type == "svm") { printf("Calculating bag-of-word-weights data ... \n"); PBowDocWgtBs TfidfWgtBs = TBowDocWgtBs::New(BowDocBs, bwwtLogDFNrmTFIDF, CutWordWgtSumPrc, MnWordFq); BowDocWgtBs = TBowDocWgtBs::NewSvmWgt(BowDocBs, TfidfWgtBs, TIntV(), SvmC, SvmTime, false, TIntV(), PutUntiNorm, CutWordWgtSumPrc, MnWordFq); printf("Done.\n"); } else { printf("Wrong method type!\n"); } // save boww data if (!OutBowwFNm.Empty()){ TStr::PutFExtIfEmpty(OutBowwFNm, ".Boww"); printf("Saving bag-of-word-weights data to '%s' ...", OutBowwFNm.CStr()); BowDocWgtBs->SaveBin(OutBowwFNm); BowDocWgtBs->SaveTxtStat(OutBowwFNm + ".txt", BowDocBs, true, true, true); printf(" Done.\n"); } return 0; Catch; return 1; }
void ComputeMissingProperties (const TStr &Dir, const TStr &TriplesFilename) { // Parse the rdf file and create the graph. TFIn File(TriplesFilename); TRDFParser DBpediaDataset(File); printf("Creating graph from input file...\n"); TGraph G; TStrSet NodeStrs; TStrSet PropStrs; bool Parsed = TSnap::GetGraphFromRDFParser(DBpediaDataset, G, NodeStrs, PropStrs); if (!Parsed) { return; } // Store the graph and associated data G.Save(*TFOut::New(Dir + "graph.bin")); NodeStrs.Save(*TFOut::New(Dir + "nodeStrs.bin")); PropStrs.Save(*TFOut::New(Dir + "propStrs.bin")); printf("Computing objects...\n"); // Get the objects of the graph. TIntV Objects; // We defined the objects to be the nodes with prefix http://dbpedia.org/resource/. TObjectFunctor ObjectFunctor(NodeStrs); TObjectUtils::GetObjects(G, ObjectFunctor, Objects); // Store and print the objects. Objects.Save(*TFOut::New(Dir + "objects.bin")); TObjectUtils::PrintObjects(Objects, NodeStrs, *TFOut::New(Dir + "objects.txt")); printf("Computing object matrix...\n"); // Here we choose the descriptors for the objects. // We chose property + nbh (value) descriptors for objects // We could also use more complicated descriptors such as subgraphs or subnetworks. TSparseColMatrix ObjectMatrix1; TSparseColMatrix ObjectMatrix2; TObjectUtils::GetPropertyCount(Objects, G, ObjectMatrix1); TObjectUtils::GetNbhCount(Objects, G, ObjectMatrix2); TLAUtils::NormalizeMatrix(ObjectMatrix1); TLAUtils::NormalizeMatrix(ObjectMatrix2); TSparseColMatrix ObjectMatrix; TLAUtils::ConcatenateMatricesRowWise(ObjectMatrix1, ObjectMatrix2, ObjectMatrix); TLAUtils::NormalizeMatrix(ObjectMatrix); ObjectMatrix.Save(*TFOut::New(Dir + "objectMatrix.bin")); printf("Clustering objects...\n"); // Partition the objects into 64 partitions (clusters). int K = 64; int NumIterations = 20; TIntV Assigments; TVec<TIntV> Clusters; TClusterUtils::GetClusters(ObjectMatrix, K, NumIterations, Assigments, Clusters); // Store the clustering data. Assigments.Save(*TFOut::New(Dir + "assigments.bin")); Clusters.Save(*TFOut::New(Dir + "clusters.bin")); // Print some details about the clusters. TClusterUtils::PrintClusterSizes(Clusters, *TFOut::New(Dir + "clusterSizes.txt")); TClusterUtils::PrintClusters(Clusters, Objects, NodeStrs, *TFOut::New(Dir + "clusters.txt")); printf("Computing similarities...\n"); // Compute the similarity betweeen the objects. const int MaxNumSimilarObjects = 100; const int NumThreads = 10; TVec<TIntFltKdV> Similarities; TSimilarityUtils::ComputeSimilarities(ObjectMatrix, Assigments, Clusters, MaxNumSimilarObjects, NumThreads, Similarities); // Store the object similarities. Similarities.Save(*TFOut::New(Dir + "objectSimilarities.bin")); // Print the object similarities. TSimilarityUtils::PrintSimilarities(Similarities, Objects, NodeStrs, 10, *TFOut::New(Dir + "objectSimilarities.txt")); printf("Computing existing property matrix...\n"); // Our goal is to compute the missing out-going properties. // Therefore, we create the matrix of existing out-going properties of the objects. TSparseColMatrix OutPropertyCountMatrix; TObjectUtils::GetOutPropertyCount(Objects, G, OutPropertyCountMatrix); TObjectUtils::PrintPropertyMatrix(OutPropertyCountMatrix, Objects, NodeStrs, PropStrs, *TFOut::New(Dir + "outPropertyCountMatrix.txt")); OutPropertyCountMatrix.Save(*TFOut::New(Dir + "outPropertyCountMatrix.bin")); printf("Computing missing properties...\n"); // And finally, compute the missing properties. int MaxNumMissingProperties = 100; TVec<TIntFltKdV> MissingProperties; TPropertyUtils::GetMissingProperties(Similarities, OutPropertyCountMatrix, MaxNumMissingProperties, NumThreads, MissingProperties); // Store the missing properties data. MissingProperties.Save(*TFOut::New(Dir + "missingProperties.bin")); // Print missing properties. TPropertyUtils::PrintMissingProperties(MissingProperties, Objects, NodeStrs, PropStrs, 10, *TFOut::New(Dir + "missingProperties.txt")); }
/// Enumerate maximal cliques of the network on more than MinMaxCliqueSize nodes void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) { TCliqueOverlap CO; MaxCliques.Clr(false); CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques); }
void LSH::MinHash(TQuoteBase *QB, THashSet<TMd5Sig>& Shingles, TVec<THash<TMd5Sig, TIntSet> >& SignatureBandBuckets) { Err("Creating buckets...\n"); THash < TMd5Sig, TIntV > Signatures; ComputeSignatures(Shingles, Signatures, NumBands * BandSize); // bucket creation for (int i = 0; i < NumBands; ++i) { SignatureBandBuckets.Add(THash<TMd5Sig, TIntSet>()); } // bucket filling int NumShingles = Shingles.Len(); THash<TInt, TQuote> Quotes; QB->GetIdToTQuotes(Quotes); THash<TInt, TQuote>::TIter CurI = Quotes.BegI(); THash<TInt, TQuote>::TIter EndI = Quotes.EndI(); TQuote Q; // SKYFALL for (; CurI < EndI; CurI++) { Q = CurI.GetDat(); TStrV Content; Q.GetParsedContent(Content); TInt Id = Q.GetId(); // signature for quote int ContentLen = Content.Len(); TVec < TIntV > Signature; for (int i = 0; i < ContentLen; i++) { const TMd5Sig ShingleMd5(Content[i]); Signature.Add(Signatures.GetDat(ShingleMd5)); } // place in bucket if (ContentLen < WordWindow) { for (int i = 0; i < NumBands; ++i) { TStr Sig; for (int j = 0; j < BandSize; ++j) { int CurSig = i * BandSize + j; TInt min = NumShingles; for (int k = 0; k < ContentLen; k++) { if (Signature[k][CurSig] < min) { min = Signature[k][CurSig]; } } Sig += min.GetStr() + "-"; } //Err(Sig.CStr()); const TMd5Sig SigMd5(Sig); TIntSet Bucket; SignatureBandBuckets[i].IsKeyGetDat(SigMd5, Bucket); Bucket.AddKey(Id); SignatureBandBuckets[i].AddDat(SigMd5, Bucket); } } else { } } Err("Minhash step complete!\n"); }
void TVizMapContext::PaintCatNms(PGks Gks, const int& KeyWdFontSize, TVec<TFltRect>& PointNmRectV) { // calculate frequency of categories TIntH CatH; TIntFltPrH CatPosH; PBowDocBs BowDocBs = VizMapFrame->GetKeyWdBow(); const int Points = VizMapFrame->GetPoints(); for (int PointN = 0; PointN < Points; PointN++) { PVizMapPoint Point = VizMapFrame->GetPoint(PointN); const int DId = Point->GetDocId(); const int CIds = BowDocBs->GetDocCIds(DId); for (int CIdN = 0; CIdN < CIds; CIdN++) { const int CId = BowDocBs->GetDocCId(DId, CIdN); CatH.AddDat(CId)++; CatPosH.AddDat(CId).Val1 += Point->GetPointX(); CatPosH.AddDat(CId).Val2 += Point->GetPointY(); } } CatH.SortByDat(false); // draw the top cats const int TopCats = Points > 100 ? 6 : 4; TFltRect ZoomRect = GetZoomRect(); Gks->SetFont(TGksFont::New("ARIAL", KeyWdFontSize + 3, ColorCatNmFont)); TVec<TFltRect> CatNmRectV; TVec<TFltV> CatNmPosV; const int MnSize = TInt::GetMn(Gks->GetWidth(), Gks->GetHeight()); const int MnDist = TFlt::Round(0.3 * double(MnSize)); int Cats = 0, CatKeyId = CatH.FFirstKeyId(); while (CatH.FNextKeyId(CatKeyId)) { if (Cats == TopCats) { break; } if (double(CatH[CatKeyId]) / double(Points) < 0.05) { break; } const int CId = CatH.GetKey(CatKeyId); // get name TStr CatNm = BowDocBs->GetCatNm(CId); if (CatFullNmH.IsKey(CatNm)) { CatNm = CatFullNmH.GetDat(CatNm); } else { continue; } // get position TFltPr CatPos = CatPosH.GetDat(CId); const int CatCount = CatH.GetDat(CId); IAssert(CatCount > 0); const double CatX = CatPos.Val1 / double(CatCount); const double CatY = CatPos.Val2 / double(CatCount); // is it within the zoom? if (!ZoomRect.IsXYIn(CatX, CatY)) { continue; } // calculate string size on the screen const int HalfTxtWidth = Gks->GetTxtWidth(CatNm) / 2; const int HalfTxtHeight = Gks->GetTxtHeight(CatNm) / 2; // get coordinates in pixels const int X = GetScreenCoord(CatX , ZoomRect.GetMnX(), ZoomRect.GetXLen(), Gks->GetWidth()); const int Y = GetScreenCoord(CatY, ZoomRect.GetMnY(), ZoomRect.GetYLen(), Gks->GetHeight()); // is it to close to any of the most prominent categories int CatNmDist = MnSize; TFltV CatNmPos = TFltV::GetV(double(X), double(Y)); for (int CatNmPosN = 0; CatNmPosN < CatNmPosV.Len(); CatNmPosN++) { const double Dist = TLinAlg::EuclDist(CatNmPosV[CatNmPosN], CatNmPos); CatNmDist = TInt::GetMn(TFlt::Round(Dist), CatNmDist); } if (CatNmDist < MnDist) { continue; } // does it overlap with any of hte most prominent categories TFltRect CatNmRect(X - HalfTxtWidth, Y - HalfTxtHeight, X + HalfTxtWidth, Y + HalfTxtHeight); bool DoDraw = true; const int Rects = CatNmRectV.Len(); for (int RectN = 0; (RectN < Rects) && DoDraw; RectN++) { DoDraw = !TFltRect::Intersection(CatNmRect, CatNmRectV[RectN]); } if (!DoDraw) { continue; } // draw it! Gks->PutTxt(CatNm, X - HalfTxtWidth, Y - HalfTxtHeight); // remember string area CatNmRectV.Add(CatNmRect); Cats++; // remember string position CatNmPosV.Add(CatNmPos); } PointNmRectV.AddV(CatNmRectV); }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("cesna. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "./1912.edges", "Input edgelist file name"); const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) "); const TStr AttrFNm = Env.GetIfArgPrefixStr("-a:", "./1912.nodefeat", "Input node attribute file name"); const TStr ANameFNm = Env.GetIfArgPrefixStr("-n:", "./1912.nodefeatnames", "Input file name for node attribute names"); int OptComs = Env.GetIfArgPrefixInt("-c:", 10, "The number of communities to detect (-1: detect automatically)"); const int MinComs = Env.GetIfArgPrefixInt("-mc:", 3, "Minimum number of communities to try"); const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 20, "Maximum number of communities to try"); const int DivComs = Env.GetIfArgPrefixInt("-nc:", 5, "How many trials for the number of communities"); const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 4, "Number of threads for parallelization"); const double AttrWeight = Env.GetIfArgPrefixFlt("-aw:", 0.5, "We maximize (1 - aw) P(Network) + aw * P(Attributes)"); const double LassoWeight = Env.GetIfArgPrefixFlt("-lw:", 1.0, "Weight for l-1 regularization on learning the logistic model parameters"); const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.05, "Alpha for backtracking line search"); const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search"); const double MinFeatFrac = Env.GetIfArgPrefixFlt("-mf:", 0.0, "If the fraction of nodes with positive values for an attribute is smaller than this, we ignore that attribute"); #ifndef NOMP omp_set_num_threads(NumThreads); #endif PUNGraph G; TIntStrH NIDNameH; TStrHash<TInt> NodeNameH; TVec<TFltV> Wck; TVec<TIntV> EstCmtyVV; if (InFNm.IsStrIn(".ungraph")) { TFIn GFIn(InFNm); G = TUNGraph::Load(GFIn); } else { G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NodeNameH); NIDNameH.Gen(NodeNameH.Len()); for (int s = 0; s < NodeNameH.Len(); s++) { NIDNameH.AddDat(s, NodeNameH.GetKey(s)); } } if (LabelFNm.Len() > 0) { TSsParser Ss(LabelFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges()); //load attribute TIntV NIDV; G->GetNIdV(NIDV); THash<TInt, TIntV> RawNIDAttrH, NIDAttrH; TIntStrH RawFeatNameH, FeatNameH; if (ANameFNm.Len() > 0) { TSsParser Ss(ANameFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { RawFeatNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } TCesnaUtil::LoadNIDAttrHFromNIDKH(NIDV, AttrFNm, RawNIDAttrH, NodeNameH); TCesnaUtil::FilterLowEntropy(RawNIDAttrH, NIDAttrH, RawFeatNameH, FeatNameH, MinFeatFrac); TExeTm RunTm; TCesna CS(G, NIDAttrH, 10, 10); if (OptComs == -1) { printf("finding number of communities\n"); OptComs = CS.FindComs(NumThreads, MaxComs, MinComs, DivComs, "", false, 0.1, StepAlpha, StepBeta); } CS.NeighborComInit(OptComs); CS.SetWeightAttr(AttrWeight); CS.SetLassoCoef(LassoWeight); if (NumThreads == 1 || G->GetEdges() < 1000) { CS.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta); } else { CS.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta); } CS.GetCmtyVV(EstCmtyVV, Wck); TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH); FILE* F = fopen((OutFPrx + "weights.txt").CStr(), "wt"); if (FeatNameH.Len() == Wck[0].Len()) { fprintf(F, "#"); for (int k = 0; k < FeatNameH.Len(); k++) { fprintf(F, "%s", FeatNameH[k].CStr()); if (k < FeatNameH.Len() - 1) { fprintf(F, "\t"); } } fprintf(F, "\n"); } for (int c = 0; c < Wck.Len(); c++) { for (int k = 0; k < Wck[c].Len(); k++) { fprintf(F, "%f", Wck[c][k].Val); if (k < Wck[c].Len() - 1) { fprintf(F, "\t"); } } fprintf(F, "\n"); } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
// Test node, edge attribute functionality TEST(TNEANet, ManipulateNodesEdgeAttributes) { int NNodes = 1000; int NEdges = 1000; const char *FName = "demo.graph.dat"; PNEANet Graph; PNEANet Graph1; int i; int x, y; bool t; Graph = TNEANet::New(); t = Graph->Empty(); // create the nodes for (i = NNodes - 1; i >= 0; i--) { Graph->AddNode(i); } EXPECT_EQ(NNodes, Graph->GetNodes()); // create the edges for (i = NEdges - 1; i >= 0; i--) { x = (long) (drand48() * NNodes); y = (long) (drand48() * NNodes); Graph->AddEdge(x, y, i); } // create attributes and fill all nodes TStr attr1 = "str"; TStr attr2 = "int"; TStr attr3 = "float"; TStr attr4 = "default"; // Test vertical int iterator for node 3, 50, 700, 900 // Check if we can set defaults to 0 for Int data. Graph->AddIntAttrN(attr2, 0); Graph->AddIntAttrDatN(3, 3*2, attr2); Graph->AddIntAttrDatN(50, 50*2, attr2); Graph->AddIntAttrDatN(700, 700*2, attr2); Graph->AddIntAttrDatN(900, 900*2, attr2); EXPECT_EQ(3*2, Graph->GetNAIntI(attr2, 3).GetDat()); EXPECT_EQ(50*2, Graph->GetNAIntI(attr2, 50).GetDat()); int NodeId = 0; int DefNodes = 0; TVec<TInt> TAIntIV = TVec<TInt>(); for (TNEANet::TAIntI NI = Graph->BegNAIntI(attr2); NI < Graph->EndNAIntI(attr2); NI++) { if (NI.GetDat()() != 0) { TAIntIV.Add(NI.GetDat()); NodeId++; } else { DefNodes++; } } EXPECT_EQ(4, NodeId); EXPECT_EQ(NNodes - 4, DefNodes); TAIntIV.Sort(); EXPECT_EQ(3*2, TAIntIV[0]); EXPECT_EQ(50*2, TAIntIV[1]); EXPECT_EQ(700*2, TAIntIV[2]); EXPECT_EQ(900*2, TAIntIV[3]); // Test vertical flt iterator for node 3, 50, 700, 900 Graph->AddFltAttrDatN(5, 3.41, attr3); Graph->AddFltAttrDatN(50, 2.718, attr3); Graph->AddFltAttrDatN(300, 150.0, attr3); Graph->AddFltAttrDatN(653, 563, attr3); EXPECT_EQ(3.41, Graph->GetNAFltI(attr3, 5).GetDat()); EXPECT_EQ(2.718, Graph->GetNAFltI(attr3, 50).GetDat()); NodeId = 0; DefNodes = 0; TVec<TFlt> TAFltIV = TVec<TFlt>(); for (TNEANet::TAFltI NI = Graph->BegNAFltI(attr3); NI < Graph->EndNAFltI(attr3); NI++) { if (NI.GetDat() != TFlt::Mn) { NodeId++; TAFltIV.Add(NI.GetDat()); } else { DefNodes++; } } EXPECT_EQ(4, NodeId); EXPECT_EQ(NNodes - 4, DefNodes); TAFltIV.Sort(); EXPECT_EQ(2.718, TAFltIV[0]); EXPECT_EQ(3.41, TAFltIV[1]); EXPECT_EQ(150.0, TAFltIV[2]); EXPECT_EQ(563.0, TAFltIV[3]); // Test vertical str iterator for node 3, 50, 700, 900 Graph->AddStrAttrDatN(10, "abc", attr1); Graph->AddStrAttrDatN(20, "def", attr1); Graph->AddStrAttrDatN(400, "ghi", attr1); // this does not show since ""=null Graph->AddStrAttrDatN(455, "", attr1); EXPECT_EQ('c', Graph->GetNAStrI(attr1, 10).GetDat().LastCh()); EXPECT_EQ('f', Graph->GetNAStrI(attr1, 20).GetDat().LastCh()); NodeId = 0; DefNodes = 0; TVec<TStr> TAStrIV = TVec<TStr>(); for (TNEANet::TAStrI NI = Graph->BegNAStrI(attr1); NI < Graph->EndNAStrI(attr1); NI++) { if (NI.GetDat() != TStr::GetNullStr()) { NodeId++; TAStrIV.Add(NI.GetDat()); } else { DefNodes++; } } EXPECT_EQ(3, NodeId); EXPECT_EQ(NNodes - 3, DefNodes); TAStrIV.Sort(); // TODO(nkhadke): Fix hack to compare strings properly. This works for now. EXPECT_EQ('c', TAStrIV[0].LastCh()); EXPECT_EQ('f', TAStrIV[1].LastCh()); EXPECT_EQ('i', TAStrIV[2].LastCh()); // Test vertical iterator over many types (must skip default/deleted attr) int NId = 55; Graph->AddStrAttrDatN(NId, "aaa", attr1); Graph->AddIntAttrDatN(NId, 3*2, attr2); Graph->AddFltAttrDatN(NId, 3.41, attr3); Graph->AddStrAttrDatN(80, "dont appear", attr4); // should not show up TStrV NIdAttrName; Graph->AttrNameNI(NId, NIdAttrName); int AttrLen = NIdAttrName.Len(); NodeId = 0; DefNodes = 0; EXPECT_EQ(3, AttrLen); Graph->DelAttrDatN(NId, attr2); Graph->AttrNameNI(NId, NIdAttrName); AttrLen = NIdAttrName.Len(); for (i = 0; i < AttrLen; i++) { if (TStr("int") == NIdAttrName[i]()) { // FAIL EXPECT_EQ(1,2); } } EXPECT_EQ(2, AttrLen); Graph->AddIntAttrDatN(NId, 3*2, attr2); Graph->DelAttrN(attr1); Graph->AttrNameNI(NId, NIdAttrName); AttrLen = NIdAttrName.Len(); for (i = 0; i < AttrLen; i++) { if (TStr("str") == NIdAttrName[i]()) { // FAIL EXPECT_EQ(1,2); } } EXPECT_EQ(2, AttrLen); TStrV NIdAttrValue; Graph->AttrValueNI(NId, NIdAttrValue); AttrLen = NIdAttrValue.Len(); for (i = 0; i < AttrLen; i++) { if (TStr("str") == NIdAttrValue[i]()) { // FAIL EXPECT_EQ(1,2); } } int expectedTotal = 0; for (i = 0; i <NNodes; i++) { Graph->AddIntAttrDatN(i, NNodes+i, attr2); EXPECT_EQ(NNodes+i, Graph->GetIntAttrDatN(i, attr2)); expectedTotal += NNodes+i; } { TFOut FOut(FName); Graph->Save(FOut); FOut.Flush(); } { TFIn FIn(FName); Graph1 = TNEANet::Load(FIn); } int total = 0; for (TNEANet::TAIntI NI = Graph1->BegNAIntI(attr2); NI < Graph1->EndNAIntI(attr2); NI++) { total += NI.GetDat(); } ASSERT_EQ(expectedTotal, total); Graph1->Clr(); // Test vertical int iterator for edge Graph->AddIntAttrDatE(3, 3*2, attr2); Graph->AddIntAttrDatE(55, 55*2, attr2); Graph->AddIntAttrDatE(705, 705*2, attr2); Graph->AddIntAttrDatE(905, 905*2, attr2); EXPECT_EQ(3*2, Graph->GetEAIntI(attr2, 3).GetDat()); EXPECT_EQ(55*2, Graph->GetEAIntI(attr2, 55).GetDat()); int EdgeId = 0; int DefEdges = 0; TAIntIV.Clr(); for (TNEANet::TAIntI EI = Graph->BegEAIntI(attr2); EI < Graph->EndEAIntI(attr2); EI++) { if (EI.GetDat() != TInt::Mn) { TAIntIV.Add(EI.GetDat()); EdgeId++; } else { DefEdges++; } } EXPECT_EQ(4, EdgeId); EXPECT_EQ(NEdges - 4, DefEdges); TAIntIV.Sort(); EXPECT_EQ(3*2, TAIntIV[0]); EXPECT_EQ(55*2, TAIntIV[1]); EXPECT_EQ(705*2, TAIntIV[2]); EXPECT_EQ(905*2, TAIntIV[3]); // Test vertical flt iterator for edge Graph->AddFltAttrE(attr3, 0.00); Graph->AddFltAttrDatE(5, 4.41, attr3); Graph->AddFltAttrDatE(50, 3.718, attr3); Graph->AddFltAttrDatE(300, 151.0, attr3); Graph->AddFltAttrDatE(653, 654, attr3); EXPECT_EQ(4.41, Graph->GetEAFltI(attr3, 5).GetDat()); EXPECT_EQ(3.718, Graph->GetEAFltI(attr3, 50).GetDat()); EdgeId = 0; DefEdges = 0; TAFltIV.Clr(); for (TNEANet::TAFltI EI = Graph->BegEAFltI(attr3); EI < Graph->EndEAFltI(attr3); EI++) { // Check if defaults are set to 0. if (EI.GetDat() != 0.00) { TAFltIV.Add(EI.GetDat()); EdgeId++; } else { DefEdges++; } } EXPECT_EQ(4, EdgeId); EXPECT_EQ(NEdges - 4, DefEdges); TAFltIV.Sort(); EXPECT_EQ(3.718, TAFltIV[0]); EXPECT_EQ(4.41, TAFltIV[1]); EXPECT_EQ(151.0, TAFltIV[2]); EXPECT_EQ(654.0, TAFltIV[3]); // Test vertical str iterator for edge Graph->AddStrAttrDatE(10, "abc", attr1); Graph->AddStrAttrDatE(20, "def", attr1); Graph->AddStrAttrDatE(400, "ghi", attr1); // this does not show since ""=null Graph->AddStrAttrDatE(455, "", attr1); EXPECT_EQ('c', Graph->GetEAStrI(attr1, 10).GetDat().LastCh()); EXPECT_EQ('f', Graph->GetEAStrI(attr1, 20).GetDat().LastCh()); EdgeId = 0; DefEdges = 0; TAStrIV.Clr(); for (TNEANet::TAStrI EI = Graph->BegEAStrI(attr1); EI < Graph->EndEAStrI(attr1); EI++) { if (EI.GetDat() != TStr::GetNullStr()) { TAStrIV.Add(EI.GetDat()); EdgeId++; } else { DefEdges++; } } EXPECT_EQ(3, EdgeId); EXPECT_EQ(NEdges - 3, DefEdges); TAStrIV.Sort(); // TODO(nkhadke): Fix hack to compare strings properly. This works for now. EXPECT_EQ('c', TAStrIV[0].LastCh()); EXPECT_EQ('f', TAStrIV[1].LastCh()); EXPECT_EQ('i', TAStrIV[2].LastCh()); // Test vertical iterator over many types (must skip default/deleted attr) int EId = 55; Graph->AddStrAttrDatE(EId, "aaa", attr1); Graph->AddIntAttrDatE(EId, 3*2, attr2); Graph->AddFltAttrDatE(EId, 3.41, attr3); Graph->AddStrAttrDatE(80, "dont appear", attr4); // should not show up TStrV EIdAttrName; Graph->AttrNameEI(EId, EIdAttrName); AttrLen = EIdAttrName.Len(); EXPECT_EQ(3, AttrLen); Graph->DelAttrDatE(EId, attr2); Graph->AttrNameEI(EId, EIdAttrName); AttrLen = EIdAttrName.Len(); for (i = 0; i < AttrLen; i++) { if (TStr("int") == EIdAttrName[i]()) { // FAIL EXPECT_EQ(2,3); } } Graph->AddIntAttrDatE(EId, 3*2, attr2); Graph->DelAttrE(attr1); Graph->AttrNameEI(EId, EIdAttrName); AttrLen = EIdAttrName.Len(); for (i = 0; i < AttrLen; i++) { if (TStr("aaa") == EIdAttrName[i]()) { // FAIL EXPECT_EQ(2,3); } } TStrV EIdAttrValue; Graph->AttrValueEI(EId, EIdAttrValue); AttrLen = EIdAttrValue.Len(); for (i = 0; i < AttrLen; i++) { if (TStr("str") == EIdAttrValue[i]()) { // FAIL EXPECT_EQ(2,3); } } expectedTotal = 0; for (i = 0; i <NEdges; i++) { Graph->AddIntAttrDatE(i, NEdges+i, attr2); EXPECT_EQ(NEdges+i, Graph->GetIntAttrDatE(i, attr2)); expectedTotal += NEdges+i; } { TFOut FOut(FName); Graph->Save(FOut); FOut.Flush(); Graph->Clr(); } { TFIn FIn(FName); Graph1 = TNEANet::Load(FIn); } total = 0; for (TNEANet::TAIntI EI = Graph1->BegNAIntI(attr2); EI < Graph1->EndNAIntI(attr2); EI++) { total += EI.GetDat(); } EXPECT_EQ(expectedTotal, total); //Graph1->Dump(); Graph1->Clr(); }
void TVizMapContext::PaintPoints(PGks Gks, const int& PointFontSize, const int& PointNmFontScale, const double& PointWgtThreshold, const int& CatId, const bool& ShowMgGlassP, TVec<TFltRect>& PointNmRectV) { int Points = VizMapFrame->GetPoints(); TFltRect ZoomRect = GetZoomRect(); for (int PointN = 0; PointN < Points; PointN++) { PVizMapPoint Point = VizMapFrame->GetPoint(PointN); // we ignore selected and nearest point in the first run bool IsSelPointP = SelPointV.IsInBin(PointN); if (IsSelPointP || ((NearPointN == PointN) && !ShowMgGlassP)) { continue; } const double PointX = Point->GetPointX(), PointY = Point->GetPointY(); if (ZoomRect.IsXYIn(PointX, PointY)) { // get coordinates in pixels const int X = GetScreenCoord(PointX, ZoomRect.GetMnX(), ZoomRect.GetXLen(), Gks->GetWidth()); const int Y = GetScreenCoord(PointY, ZoomRect.GetMnY(), ZoomRect.GetYLen(), Gks->GetHeight()); // check if point has given category bool IsCatP = Point->IsCatId(CatId); // check if the point is under threshold if (Point->IsPointNm() && ((Point->GetWgt() > PointWgtThreshold) || IsSelPointP)) { // write full point name PointNmRectV.Add(PaintPointNm(Gks, Point, X, Y, PointFontSize, PointNmFontScale, IsSelPointP, IsCatP)); } else { // draw a cross PaintPointCross(Gks, X, Y, IsSelPointP, IsCatP); } } } // paint selected points for (int SelPointN = 0; SelPointN < SelPointV.Len(); SelPointN++) { const int PointN = SelPointV[SelPointN]; if ((NearPointN == PointN) && !ShowMgGlassP) { continue; } PVizMapPoint Point = VizMapFrame->GetPoint(PointN); const double PointX = Point->GetPointX(), PointY = Point->GetPointY(); if (ZoomRect.IsXYIn(PointX, PointY)) { // get coordinates in pixels const int X = GetScreenCoord(PointX, ZoomRect.GetMnX(), ZoomRect.GetXLen(), Gks->GetWidth()); const int Y = GetScreenCoord(PointY, ZoomRect.GetMnY(), ZoomRect.GetYLen(), Gks->GetHeight()); // check if point has given category bool IsCatP = Point->IsCatId(CatId); // check if the point is under threshold if (Point->IsPointNm()) { // write full point name PointNmRectV.Add(PaintPointNm(Gks, Point, X, Y, PointFontSize, PointNmFontScale, true, IsCatP)); } else { // draw a cross PaintPointCross(Gks, X, Y, true, IsCatP); } } } // paint nearest point if (!ShowMgGlassP && (NearPointN != -1)) { PVizMapPoint Point = VizMapFrame->GetPoint(NearPointN); const double PointX = Point->GetPointX(), PointY = Point->GetPointY(); if (ZoomRect.IsXYIn(PointX, PointY)) { // get coordinates in pixels const int X = GetScreenCoord(PointX, ZoomRect.GetMnX(), ZoomRect.GetXLen(), Gks->GetWidth()); const int Y = GetScreenCoord(PointY, ZoomRect.GetMnY(), ZoomRect.GetYLen(), Gks->GetHeight()); // check if point has given category bool IsCatP = Point->IsCatId(CatId); // check if point is selected bool IsSelPointP = SelPointV.IsInBin(NearPointN); // check if the point is under threshold if (Point->IsPointNm()) { // write full point name PointNmRectV.Add(PaintPointNm(Gks, Point, X, Y, PointFontSize, PointNmFontScale, IsSelPointP, IsCatP)); } else { // draw a cross PaintPointCross(Gks, X, Y, IsSelPointP, IsCatP); } } } }
/// estimate number of communities using AGM int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) { TRnd Rnd(RndSeed); int LambdaIter = 100; if (Graph->GetNodes() < 200) { LambdaIter = 1; } if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; } //Find coms with large C TAGMFit AGMFitM(Graph, InitComs, RndSeed); if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); } AGMFitM.RunMCMC(MaxIter, LambdaIter, ""); int TE = Graph->GetEdges(); TFltV RegV; RegV.Add(0.3 * TE); for (int r = 0; r < 25; r++) { RegV.Add(RegV.Last() * RegGap); } TFltPrV RegComsV, RegLV, RegBICV; TFltV LV, BICV; //record likelihood and number of communities with nonzero P_c for (int r = 0; r < RegV.Len(); r++) { double RegCoef = RegV[r]; AGMFitM.SetRegCoef(RegCoef); AGMFitM.MLEGradAscentGivenCAG(0.01, 1000); AGMFitM.SetRegCoef(0.0); TVec<TIntV> EstCmtyVV; AGMFitM.GetCmtyVV(EstCmtyVV, 0.99); int NumLowQ = EstCmtyVV.Len(); RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ)); if (EstCmtyVV.Len() > 0) { TAGMFit AFTemp(Graph, EstCmtyVV, Rnd); AFTemp.MLEGradAscentGivenCAG(0.001, 1000); double CurL = AFTemp.Likelihood(); LV.Add(CurL); BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0)); } else { break; } } // if likelihood does not exist or does not change at all, report the smallest number of communities or 2 if (LV.Len() == 0) { return 2; } else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); } //normalize likelihood and BIC to 0~100 int MaxL = 100; { TFltV& ValueV = LV; TFltPrV& RegValueV = RegLV; double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; for (int l = 0; l < ValueV.Len(); l++) { if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } } while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); } double RangeVal = MaxValue - MinValue; for (int l = 0; l < ValueV.Len(); l++) { RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); } } { TFltV& ValueV = BICV; TFltPrV& RegValueV = RegBICV; double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; for (int l = 0; l < ValueV.Len(); l++) { if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } } while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); } double RangeVal = MaxValue - MinValue; for (int l = 0; l < ValueV.Len(); l++) { RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); } } //fit logistic regression to normalized likelihood. TVec<TFltV> XV(RegLV.Len()); TFltV YV (RegLV.Len()); for (int l = 0; l < RegLV.Len(); l++) { XV[l] = TFltV::GetV(log(RegLV[l].Val1)); YV[l] = RegLV[l].Val2 / (double) MaxL; } TFltPrV LRVScaled, LRV; TLogRegFit LRFit; PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx); for (int l = 0; l < RegLV.Len(); l++) { LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l]))); LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2)); } //estimate # communities from fitted logistic regression int NumComs = 0, IdxRegDrop = 0; double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25 double LeftReg = 0.0, RightReg = 0.0; TFltV Theta; LRMd->GetTheta(Theta); RegDrop = (- Theta[1] - LRThres) / Theta[0]; if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; } else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; } else { //interpolate for RegDrop for (int i = 0; i < XV.Len(); i++) { if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; } } if (IdxRegDrop == 0) { printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val); for (int l = 0; l < RegLV.Len(); l++) { printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val); } } IAssert(IdxRegDrop > 0); LeftReg = RegDrop - XV[IdxRegDrop - 1][0]; RightReg = XV[IdxRegDrop][0] - RegDrop; NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg)); } //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2); printf("Num Coms:%d\n", NumComs); if (NumComs < 2) { NumComs = 2; } if (PltFPrx.Len() > 0) { TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE); TGnuPlot GPC(PltFPrx + ".l"); GPC.AddPlot(RegComsV, gpwLinesPoints, "C"); GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood"); GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC"); GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)"); GPC.SetScale(gpsLog10X); GPC.SetTitle(PlotTitle); GPC.SavePng(PltFPrx + ".l.png"); } return NumComs; }
/// save graph into a gexf file which Gephi can read void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) { THash<TInt,TIntV> NIDComVHAtr; TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr); FILE* F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n"); if (CmtyVVAtr.Len() > 0) { fprintf(F, "\t<attributes class='node'>\n"); for (int c = 0; c < CmtyVVAtr.Len(); c++) { fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c); fprintf(F, "\t\t<default>false</default>\n"); fprintf(F, "\t\t</attribute>\n"); } fprintf(F, "\t</attributes>\n"); } fprintf(F, "\t\t<nodes>\n"); for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { int NID = NI.GetId(); TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; Label.ChangeChAll('<', ' '); Label.ChangeChAll('>', ' '); Label.ChangeChAll('&', ' '); Label.ChangeChAll('\'', ' '); TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); double Size = MinSz; double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len(); if (NIDComVHAtr.IsKey(NID)) { Size = MinSz + SizeStep * (double) NIDComVHAtr.GetDat(NID).Len(); } double Alpha = 1.0; fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); //specify attributes if (NIDComVHAtr.IsKey(NID)) { fprintf(F, "\t\t\t\t<attvalues>\n"); for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) { int CID = NIDComVHAtr.GetDat(NID)[c]; fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID); } fprintf(F, "\t\t\t\t</attvalues>\n"); } fprintf(F, "\t\t\t</node>\n"); } fprintf(F, "\t\t</nodes>\n"); //plot edges int EID = 0; fprintf(F, "\t\t<edges>\n"); for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { for (int e = 0; e < NI.GetOutDeg(); e++) { if (NI.GetId() > NI.GetOutNId(e)) { continue; } fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e)); } } fprintf(F, "\t\t</edges>\n"); fprintf(F, "\t</graph>\n"); fprintf(F, "</gexf>\n"); fclose(F); }
void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) { OutV.Gen(X.Len()); for (int r = 0; r < X.Len(); r++) { OutV[r] = GetCfy(X[r], NewTheta); } }
void TIndex::TQmGixSumMerger<TQmGixItem>::Union(TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV) const { TVec<TQmGixItem> ResV; int ValN1 = 0; int ValN2 = 0; while ((ValN1 < MainV.Len()) && (ValN2 < JoinV.Len())) { const TQmGixItem& Val1 = MainV.GetVal(ValN1); const TQmGixItem& Val2 = JoinV.GetVal(ValN2); if (Val1 < Val2) { ResV.Add(Val1); ValN1++; } else if (Val1 > Val2) { ResV.Add(Val2); ValN2++; } else { ResV.Add(TQmGixItem(Val1.Key, Val1.Dat + Val2.Dat)); ValN1++; ValN2++; } } for (int RestValN1 = ValN1; RestValN1 < MainV.Len(); RestValN1++) { ResV.Add(MainV.GetVal(RestValN1)); } for (int RestValN2 = ValN2; RestValN2 < JoinV.Len(); RestValN2++) { ResV.Add(JoinV.GetVal(RestValN2)); } MainV = ResV; }
void TIndex::TQmGixSumMerger<TQmGixItem>::Minus(const TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV, TVec<TQmGixItem>& ResV) const { MainV.Diff(JoinV, ResV); }