void TSkyGridEnt::GetSorted_LinkWgtDstEntIdPrV( const uint64& MnTm, const double& TopWgtSumPrc, TIntPrV& LinkWgtDstEntIdPrV) const { double AllLinkWgtSum=0; TIntIntH DstEntIdLinkWgtH; int LinkEnts=GetLinkEnts(); for (int LinkEntN=0; LinkEntN<LinkEnts; LinkEntN++){ int DstEntId=GetLinkEntId(LinkEntN); int EntLinks=GetEntLinks(LinkEntN); int EntLinkWgtSum=0; for (int EntLinkN=0; EntLinkN<EntLinks; EntLinkN++){ const TSkyGridEntLinkCtx& EntLinkCtx=GetEntLinkCtx(LinkEntN, EntLinkN); if (EntLinkCtx.Tm>=MnTm){ EntLinkWgtSum+=EntLinkCtx.LinkWgt;} } DstEntIdLinkWgtH.AddDat(DstEntId, EntLinkWgtSum); AllLinkWgtSum+=EntLinkWgtSum; } LinkWgtDstEntIdPrV.Clr(); DstEntIdLinkWgtH.GetDatKeyPrV(LinkWgtDstEntIdPrV); LinkWgtDstEntIdPrV.Sort(false); // cut long-tail if ((TopWgtSumPrc>0.0)&&(LinkWgtDstEntIdPrV.Len()>0)){ int TopLinkWgt=LinkWgtDstEntIdPrV[0].Val1; if (TopLinkWgt>(3*AllLinkWgtSum)/LinkWgtDstEntIdPrV.Len()){ double CutWgtSum=AllLinkWgtSum*(1-TopWgtSumPrc); int LastValN=LinkWgtDstEntIdPrV.Len()-1; while ((LastValN>0)&&(CutWgtSum>0)){ CutWgtSum-=LinkWgtDstEntIdPrV[LastValN].Val1; LastValN--; } LinkWgtDstEntIdPrV.Trunc(LastValN+1); } } }
void TSkyGridBs::GetLinkWgtDstEntIdPrVDiff( const TIntPrV& OldLinkWgtDstEntIdPrV, const TIntPrV& NewLinkWgtDstEntIdPrV, TIntPrV& NegDiffLinkWgtDstEntIdPrV, TIntPrV& PosDiffLinkWgtDstEntIdPrV){ TIntIntH DstEntIdToLinkWgtH; // set previous-vector for (int WordN=0; WordN<NewLinkWgtDstEntIdPrV.Len(); WordN++){ int LinkWgt=NewLinkWgtDstEntIdPrV[WordN].Val1; int DstEntId=NewLinkWgtDstEntIdPrV[WordN].Val2; DstEntIdToLinkWgtH.AddDat(DstEntId, LinkWgt); } // diff current-vector for (int WordN=0; WordN<OldLinkWgtDstEntIdPrV.Len(); WordN++){ int LinkWgt=OldLinkWgtDstEntIdPrV[WordN].Val1; int DstEntId=OldLinkWgtDstEntIdPrV[WordN].Val2; int CurLinkWgt=DstEntIdToLinkWgtH.AddDat(DstEntId); DstEntIdToLinkWgtH.AddDat(DstEntId, CurLinkWgt-LinkWgt); } // extract vector TIntPrV _DiffLinkWgtDstEntIdPrV; DstEntIdToLinkWgtH.GetDatKeyPrV(_DiffLinkWgtDstEntIdPrV); // clean zeros TIntPrV DiffLinkWgtDstEntIdPrV(_DiffLinkWgtDstEntIdPrV.Len(), 0); for (int EntN=0; EntN<_DiffLinkWgtDstEntIdPrV.Len(); EntN++){ int LinkWgt=_DiffLinkWgtDstEntIdPrV[EntN].Val1; if (LinkWgt!=0){ DiffLinkWgtDstEntIdPrV.Add(_DiffLinkWgtDstEntIdPrV[EntN]);} } // positive-vector DiffLinkWgtDstEntIdPrV.Sort(true); NegDiffLinkWgtDstEntIdPrV=DiffLinkWgtDstEntIdPrV; // negative-vector DiffLinkWgtDstEntIdPrV.Sort(false); PosDiffLinkWgtDstEntIdPrV=DiffLinkWgtDstEntIdPrV; }
// Test the default constructor TEST(TIntIntH, DefaultConstructor) { TIntIntH TableInt; EXPECT_EQ(1,TableInt.Empty()); EXPECT_EQ(0,TableInt.Len()); EXPECT_EQ(0,TableInt.GetMxKeyIds()); }
void TNmObjBs::GetFqNmObjIdPrV( const TStr& TargetNmObjStr, TIntPrV& FqNmObjIdPrV) const { //printf("Searching %s ...", TargetNmObjStr.CStr()); // get target named-object-id int TargetNmObjId=GetNmObjId(TargetNmObjStr); // collect target named-object frequencies TIntIntH NmObjIdToFqH; // traverse target named-object documents int NmObjDocs=GetNmObjDocs(TargetNmObjId); for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocs; NmObjDocIdN++){ // get document-id int DocId=GetNmObjDocId(TargetNmObjId, NmObjDocIdN); // traverse named-object in document int DocNmObjs=GetDocNmObjs(DocId); for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){ // get named-object & frequency int NmObjId; int TermFq; GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq); // increment named-object document frequency NmObjIdToFqH.AddDat(NmObjId)++; } } // get & sort frequency table FqNmObjIdPrV.Clr(); NmObjIdToFqH.GetDatKeyPrV(FqNmObjIdPrV); FqNmObjIdPrV.Sort(false); }
///////////////////////////////////////////////// // SkyGrid-Base void TSkyGridBs::GetSorted_DocsEntIdPrV(TIntPrV& DocsEntIdPrV){ TIntIntH EntIdToDocsH; for (int EntId=0; EntId<GetEnts(); EntId++){ int Docs=GetEnt(EntId).GetDocIds(); EntIdToDocsH.AddDat(EntId, Docs); } DocsEntIdPrV.Clr(); EntIdToDocsH.GetDatKeyPrV(DocsEntIdPrV); DocsEntIdPrV.Sort(false); }
TIntH LoadNodeList(TStr InFNmNodes) { TSsParser Ss(InFNmNodes, ssfWhiteSep, true, true, true); TIntIntH Nodes; int br = 0, NId; while (Ss.Next()) { if (Ss.GetInt(0, NId)) { Nodes.AddDat(br, NId); br++; } } return Nodes; }
void TCycBs::_SaveTaxonomyTxt(FILE* fOut, const int& Lev, TIntPrV& RelIdVIdPrV, TIntIntH& VIdToLevH){ for (int VidN=0; VidN<RelIdVIdPrV.Len(); VidN++){ int FromRelId=RelIdVIdPrV[VidN].Val1; int SrcVId=RelIdVIdPrV[VidN].Val2; TStr SrcVNm=GetVNm(SrcVId); TCycVrtx& SrcVrtx=GetVrtx(SrcVId); if (!SrcVrtx.IsFlag(cvfHumanOk)){continue;} TStr FlagStr=SrcVrtx.GetFlagStr(); if (FromRelId==-1){ if (Lev>0){fprintf(fOut, "===upper");} else {fprintf(fOut, "===lower");} fprintf(fOut, "=======================================================\n"); fprintf(fOut, "%s - %s\n", SrcVNm.CStr(), FlagStr.CStr()); } else { TStr FromRelNm=GetVNm(FromRelId); fprintf(fOut, "%*c[%s] --> %s\n", (Lev-1)*5, ' ', FromRelNm.CStr(), SrcVNm.CStr()); } TIntPrV UpRelIdVIdPrV; for (int EdgeN=0; EdgeN<SrcVrtx.GetEdges(); EdgeN++){ TCycEdge& Edge=SrcVrtx.GetEdge(EdgeN); int RelId=Edge.GetRelId(); int DstVId=Edge.GetDstVId(); TStr RelNm=GetVNm(RelId); TStr DstVNm=GetVNm(DstVId); if (Lev>0){ // upper taxonomy if ((RelNm=="#$isa")||(RelNm=="#$genls")){ if (!VIdToLevH.IsKey(DstVId)){ VIdToLevH.AddDat(DstVId, Lev+1); UpRelIdVIdPrV.Add(TIntPr(RelId, DstVId)); } } } else { // lower taxonomy if ((RelNm=="~#$isa")||(RelNm=="~#$genls")){ if (!VIdToLevH.IsKey(DstVId)){ VIdToLevH.AddDat(DstVId, Lev-1); UpRelIdVIdPrV.Add(TIntPr(RelId, DstVId)); } } } } // recursive call if (Lev>0){ _SaveTaxonomyTxt(fOut, Lev+1, UpRelIdVIdPrV, VIdToLevH); } else { _SaveTaxonomyTxt(fOut, Lev-1, UpRelIdVIdPrV, VIdToLevH); } } }
void TCycBs::SaveTaxonomyTxt(const TStr& FNm){ TFOut FOut(FNm); FILE* fOut=FOut.GetFileId(); for (int VId=0; VId<GetVIds(); VId++){ printf("%d/%d (%.1f%%)\r", 1+VId, GetVIds(), 100.0*(1+VId)/GetVIds()); //if (VId>10){break;} // upper taxonomy {int Lev=0; TIntIntH VIdToLevH; VIdToLevH.AddDat(VId, Lev); TIntPrV UpRelIdVIdPrV; UpRelIdVIdPrV.Add(TIntPr(-1, VId)); _SaveTaxonomyTxt(fOut, Lev+1, UpRelIdVIdPrV, VIdToLevH);} // lower taxonomy {int Lev=0; TIntIntH VIdToLevH; VIdToLevH.AddDat(VId, Lev); TIntPrV UpRelIdVIdPrV; UpRelIdVIdPrV.Add(TIntPr(-1, VId)); _SaveTaxonomyTxt(fOut, Lev-1, UpRelIdVIdPrV, VIdToLevH);} } printf("\n"); }
double BorgattiEverettMeasure(PUNGraph& Graph, TIntIntH& out, double coresize, int type) { double sum = 0.0; for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { // Calculate and store the degrees of each node. int i = EI.GetSrcNId(); int j = EI.GetDstNId(); if (type == 1) { if (out.GetDat(i) == 1 || out.GetDat(j) == 1) sum += 1; } else { if (out.GetDat(i) == 1 && out.GetDat(j) == 1) sum += 1; } } return sum/(((coresize*coresize)-coresize)/2); }
double PearsonCorrelation(PUNGraph& Graph, TIntIntH& out, int coresize) { int br_core1=0,br_periphery1=0,br_core_per1=0; for (TUNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { // Calculate and store the degrees of each node. int i = EI.GetSrcNId(); int j = EI.GetDstNId(); if (out.GetDat(i)==1&&out.GetDat(j)==1 && i!=j) br_core1++; else if (out.GetDat(i)==0&&out.GetDat(j)==0 && i!=j) br_periphery1++; else br_core_per1++; } double core_quality = (double)br_core1/((((double)coresize*(double)coresize)-(double)coresize)/2); int per_size = Graph->GetNodes()-coresize; double periphery_quality = (((((double)per_size*(double)per_size)-(double)per_size)/2) - (double)br_periphery1)/((((double)per_size*(double)per_size)-(double)per_size)/2); return (double)(core_quality+periphery_quality); }
// hash table benchmark with integer keys void HashBench(const int& n) { TIntIntH TableInt; float ft0, ft1; int x; int i; int Found; int NotFound; int Id; // build the hash table ft0 = GetCPUTime(); for (i = 0; i < n; i++) { x = (int) (drand48() * 100000000); TableInt.AddDat(x,0); } printf("hash: size %d\n", TableInt.Len()); ft1 = GetCPUTime(); printf("hash: %7.3fs inserting %d numbers\n",ft1-ft0,i); // search the hash table ft0 = GetCPUTime(); Found = 0; NotFound = 0; for (i = 0; i < n; i++) { x = (int) (drand48() * 100000000); Id = TableInt.GetKeyId(x); if (Id < 0) { NotFound++; } else { Found++; } } printf("hash: found %d, notfound %d\n", Found, NotFound); ft1 = GetCPUTime(); printf("hash: %7.3fs searching %d numbers\n",ft1-ft0,i); }
void LearnEmbeddings(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize, int& Iter, bool& Verbose, TIntFltVH& EmbeddingsHV) { TIntIntH RnmH; TIntIntH RnmBackH; int64 NNodes = 0; //renaming nodes into consecutive numbers for (int i = 0; i < WalksVV.GetXDim(); i++) { for (int64 j = 0; j < WalksVV.GetYDim(); j++) { if ( RnmH.IsKey(WalksVV(i, j)) ) { WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j)); } else { RnmH.AddDat(WalksVV(i,j),NNodes); RnmBackH.AddDat(NNodes,WalksVV(i, j)); WalksVV(i, j) = NNodes++; } } } TIntV Vocab(NNodes); LearnVocab(WalksVV, Vocab); TIntV KTable(NNodes); TFltV UTable(NNodes); TVVec<TFlt, int64> SynNeg; TVVec<TFlt, int64> SynPos; TRnd Rnd(time(NULL)); InitPosEmb(Vocab, Dimensions, Rnd, SynPos); InitNegEmb(Vocab, Dimensions, SynNeg); InitUnigramTable(Vocab, KTable, UTable); TFltV ExpTable(TableSize); double Alpha = StartAlpha; //learning rate #pragma omp parallel for schedule(dynamic) for (int i = 0; i < TableSize; i++ ) { double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision); ExpTable[i] = TMath::Power(TMath::E, Value); } int64 WordCntAll = 0; // op RS 2016/09/26, collapse does not compile on Mac OS X //#pragma omp parallel for schedule(dynamic) collapse(2) for (int j = 0; j < Iter; j++) { #pragma omp parallel for schedule(dynamic) for (int64 i = 0; i < WalksVV.GetXDim(); i++) { TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable, WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos); } } if (Verbose) { printf("\n"); fflush(stdout); } for (int64 i = 0; i < SynPos.GetXDim(); i++) { TFltV CurrV(SynPos.GetYDim()); for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); } EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV); } }
int FastCorePeriphery(PUNGraph& Graph, TIntIntH& out) { TIntIntH nodes; double Z=0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { // Calculate and store the degrees of each node. int deg = NI.GetDeg(); int id = NI.GetId(); Z += deg; nodes.AddDat(id,deg); } Z = Z/2; nodes.SortByDat(false); // Then sort the nodes in descending order of degree, to get a list of nodes {v1, v2, . . . , vn}. double Zbest = 99999900000000000; int kbest = 0; int br=0; for (int k=0; k<nodes.Len(); k++) { br++; Z = Z + br - 1 - nodes[k]; if (Z < Zbest) { // or <= Zbest = Z; kbest = br; } } int cp = 0; br = 0; for (THashKeyDatI<TInt, TInt> it = nodes.BegI(); !it.IsEnd(); it++) { if (br < kbest) cp = 1; else cp = 0; out.AddDat(it.GetKey(), cp); br++; } return kbest; }
void TGreedyAlg::generateCascades(const int& noCasacdes, const double& pInit, const double& p, const double& q) { int noNodes = groundTruthGraph->GetNodes(); // printf("Generating cascade for graph with noNodes = %d:\n\n", noNodes); if (noNodes == 0) { return; } // set random seed TInt::Rnd.Randomize(); for (int casacdeI = 0; casacdeI < noCasacdes; casacdeI++) { TCascade cascade; double globalTime = 0; int noActiveNodes = 0; int noSusceptibleNodes = 0; int noInactiveNodes = 0; TIntIntH nodeStates; // flip biased coin for each node to collect seeds for (TKColourNet::TNodeI NI = groundTruthGraph->BegNI(); NI < groundTruthGraph->EndNI(); NI++) { const int nodeId = NI.GetId(); double flipResult = TInt::Rnd.GetUniDev(); // printf("nodeId = %d, flipResult = %f\n", nodeId, (float) flipResult); if (flipResult <= pInit) { nodeStates.AddDat(nodeId) = activeState; // printf("##### ADD TO CASCADE: nodeId = %d, globalTime = %f #####\n", nodeId, globalTime); cascade.Add(nodeId, globalTime); noActiveNodes++; } else { nodeStates.AddDat(nodeId) = susceptibleState; noSusceptibleNodes++; } } globalTime++; while (noActiveNodes > 0) { // printf("\n***** noActiveNodes = %d, noSusceptibleNodes = %d, noInactiveNodes = %d, globalTime = %f *****\n\n", noActiveNodes, noSusceptibleNodes, noInactiveNodes, (float) globalTime); const TIntIntH beginningNodeStates = nodeStates; // for each node in the graph for (TKColourNet::TNodeI NI = groundTruthGraph->BegNI(); NI < groundTruthGraph->EndNI(); NI++) { const int nodeId = NI.GetId(); const int nodeColourId = NI.GetDat().getColourId(); const int nodeState = beginningNodeStates.GetDat(nodeId); // printf("nodeId = %d, nodeColourId = %d, nodeState = %d\n", nodeId, nodeColourId, nodeState); // if node is active, infect susceptible child nodes with probability pij if (nodeState == activeState) { const TKColourNet::TNodeI LNI = groundTruthGraph->GetNI(nodeId); for (int e = 0; e < LNI.GetOutDeg(); e++) { const int childNodeId = LNI.GetOutNId(e); const int childNodeColourId = LNI.GetOutNDat(e).getColourId(); const int childNodeState = nodeStates.GetDat(childNodeId); // printf("childNodeId = %d, childNodeColourId = %d, childNodeState = %d\n", childNodeId, childNodeColourId, childNodeState); if (childNodeState == susceptibleState) { double probablityInfection = (nodeColourId == childNodeColourId) ? p : q; double flipResult = TInt::Rnd.GetUniDev(); // printf("childNodeId = %d, probabilityInfection = %f, flipResult = %f\n", childNodeId, (float) probablityInfection, (float) flipResult); // infection occurred: childNode goes from susceptible -> active if (flipResult <= probablityInfection) { nodeStates[childNodeId] = activeState; // printf("##### ADD TO CASCADE: nodeId = %d, globalTime = %f #####\n", childNodeId, globalTime); cascade.Add(childNodeId, globalTime); noActiveNodes++; noSusceptibleNodes--; } } } // main node goes from active -> inactive nodeStates[nodeId] = inactiveState; noActiveNodes--; noInactiveNodes++; } } globalTime++; } addCascade(cascade); } }
// Table manipulations TEST(TIntIntH, ManipulateTable) { const int64 NElems = 1000000; int DDist = 10; const char *FName = "test.hashint.dat"; TIntIntH TableInt; TIntIntH TableInt1; TIntIntH TableInt2; int i; int d; int n; int Id; int Key; int64 KeySumVal; int64 DatSumVal; int64 KeySum; int64 DatSum; int64 KeySumDel; int64 DatSumDel; int DelCount; int Count; // add table elements d = Prime(NElems); n = d; KeySumVal = 0; DatSumVal = 0; for (i = 0; i < NElems; i++) { TableInt.AddDat(n,n+1); KeySumVal += n; DatSumVal += (n+1); //printf("add %d %d\n", n, n+1); n = (n + d) % NElems; } EXPECT_EQ(0,TableInt.Empty()); EXPECT_EQ(NElems,TableInt.Len()); EXPECT_EQ(0,(NElems-1)*(NElems)/2 - KeySumVal); EXPECT_EQ(0,(NElems)*(NElems+1)/2 - DatSumVal); // verify elements by successive keys KeySum = 0; DatSum = 0; for (i = 0; i < NElems; i++) { Id = TableInt.GetKeyId(i); EXPECT_EQ(1,Id >= 0); Key = TableInt.GetKey(Id); EXPECT_EQ(0,TableInt.GetDat(Key)-Key-1); KeySum += Key; DatSum += TableInt.GetDat(Key); } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // verify elements by distant keys KeySum = 0; DatSum = 0; n = Prime(d); for (i = 0; i < NElems; i++) { Id = TableInt.GetKeyId(n); EXPECT_EQ(1,Id >= 0); Key = TableInt.GetKey(Id); EXPECT_EQ(0,TableInt.GetDat(Key)-Key-1); KeySum += Key; DatSum += TableInt.GetDat(Key); n = (n + d) % NElems; } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // verify elements by iterator KeySum = 0; DatSum = 0; for (TIntIntH::TIter It = TableInt.BegI(); It < TableInt.EndI(); It++) { EXPECT_EQ(0,It.GetDat()-It.GetKey()-1); KeySum += It.GetKey(); DatSum += It.GetDat(); } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // verify elements by key index KeySum = 0; DatSum = 0; Id = TableInt.FFirstKeyId(); while (TableInt.FNextKeyId(Id)) { EXPECT_EQ(1,Id >= 0); Key = TableInt.GetKey(Id); EXPECT_EQ(0,TableInt.GetDat(Key)-Key-1); KeySum += Key; DatSum += TableInt.GetDat(Key); } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // delete elements DelCount = 0; KeySumDel = 0; DatSumDel = 0; for (n = 0; n < NElems; n += DDist) { Id = TableInt.GetKeyId(n); //printf("del %d %d %d\n", n, Id, (int) TableInt[Id]); KeySumDel += n; DatSumDel += TableInt[Id]; TableInt.DelKeyId(Id); DelCount++; } EXPECT_EQ(0,TableInt.Empty()); EXPECT_EQ(NElems-DelCount,TableInt.Len()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TIntIntH::TIter It = TableInt.BegI(); It < TableInt.EndI(); It++) { EXPECT_EQ(0,It.GetDat()-It.GetKey()-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += It.GetKey(); DatSum += It.GetDat(); Count++; } EXPECT_EQ(NElems-DelCount,Count); EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum); EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum); // assignment TableInt1 = TableInt; EXPECT_EQ(0,TableInt1.Empty()); EXPECT_EQ(NElems-DelCount,TableInt1.Len()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TIntIntH::TIter It = TableInt1.BegI(); It < TableInt1.EndI(); It++) { EXPECT_EQ(0,It.GetDat()-It.GetKey()-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += It.GetKey(); DatSum += It.GetDat(); Count++; } EXPECT_EQ(NElems-DelCount,Count); EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum); EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum); // saving and loading { TFOut FOut(FName); TableInt.Save(FOut); FOut.Flush(); } { TFIn FIn(FName); TableInt2.Load(FIn); } EXPECT_EQ(NElems-DelCount,TableInt2.Len()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TIntIntH::TIter It = TableInt2.BegI(); It < TableInt2.EndI(); It++) { EXPECT_EQ(0,It.GetDat()-It.GetKey()-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += It.GetKey(); DatSum += It.GetDat(); Count++; } EXPECT_EQ(NElems-DelCount,Count); EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum); EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum); // remove all elements for (i = 0; i < Count; i++) { Id = TableInt.GetRndKeyId(TInt::Rnd, 0.5); TableInt.DelKeyId(Id); } EXPECT_EQ(0,TableInt.Len()); EXPECT_EQ(1,TableInt.Empty()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TIntIntH::TIter It = TableInt.BegI(); It < TableInt.EndI(); It++) { EXPECT_EQ(0,It.GetDat()-It.GetKey()-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += It.GetKey(); DatSum += It.GetDat(); Count++; } EXPECT_EQ(0,Count); EXPECT_EQ(0,KeySum); EXPECT_EQ(0,DatSum); // clear the table TableInt1.Clr(); EXPECT_EQ(0,TableInt1.Len()); EXPECT_EQ(1,TableInt1.Empty()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TIntIntH::TIter It = TableInt1.BegI(); It < TableInt1.EndI(); It++) { EXPECT_EQ(0,It.GetDat()-It.GetKey()-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += It.GetKey(); DatSum += It.GetDat(); Count++; } EXPECT_EQ(0,Count); EXPECT_EQ(0,KeySum); EXPECT_EQ(0,DatSum); }
PBowDocBs TBowFl::LoadSvmLightTxt( const TStr& DocDefFNm, const TStr& WordDefFNm, const TStr& TrainDataFNm, const TStr& TestDataFNm, const int& MxDocs){ //TODO: use MxDocs // prepare document set PBowDocBs BowDocBs=TBowDocBs::New(); int MOneCId=BowDocBs->CatNmToFqH.AddKey("-1"); int POneCId=BowDocBs->CatNmToFqH.AddKey("+1"); // document definition bool DocDefP=false; if (!DocDefFNm.Empty()&&(TFile::Exists(DocDefFNm))){ // (DId "DoxNm"<eoln>)* PSIn SIn=TFIn::New(DocDefFNm); TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept); Lx.GetSym(syInt, syEof); while (Lx.Sym==syInt){ int DId=Lx.Int; Lx.GetSym(syColon); Lx.GetSym(syQStr); TStr DocNm=Lx.Str; Lx.GetSym(syEoln); Lx.GetSym(syInt, syEof); int NewDId=BowDocBs->DocNmToDescStrH.AddKey(DocNm); EAssertR(DId==NewDId, "Document-Ids don't match."); } DocDefP=true; } // word definition if (!WordDefFNm.Empty()&&(TFile::Exists(WordDefFNm))){ BowDocBs->WordStrToDescH.AddDat("Undef").Fq=0; // ... to have WId==0 PSIn SIn=TFIn::New(WordDefFNm); TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept); Lx.GetSym(syQStr, syEof); while (Lx.Sym==syQStr){ TStr WordStr=Lx.Str; Lx.GetSym(syInt); int WId=Lx.Int; Lx.GetSym(syInt); int WordFq=Lx.Int; Lx.GetSym(syEoln); Lx.GetSym(syQStr, syEof); int NewWId=BowDocBs->WordStrToDescH.AddKey(WordStr); EAssertR(WId==NewWId, "Word-Ids don't match."); BowDocBs->WordStrToDescH[WId].Fq=WordFq; } } // train & test data int MxWId=-1; TIntIntH WIdToFqH; // train data if (!TrainDataFNm.Empty()){ PSIn SIn=TFIn::New(TrainDataFNm); TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept); // skip comment lines while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} // parse data lines while (Lx.Sym==syInt){ // document TStr DocNm=TInt::GetStr(BowDocBs->GetDocs()); int DId; if (DocDefP){ DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm); } else { DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm); } BowDocBs->TrainDIdV.Add(DId); // category (class value) int CId=(Lx.Int==-1) ? MOneCId : POneCId; BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1); BowDocBs->DocCIdVV.Last().Gen(1, 0); BowDocBs->DocCIdVV.Last().Add(CId); // words (attributes) PBowSpV SpV=TBowSpV::New(DId); BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1); Lx.GetSym(syInt, syEoln); while (Lx.Sym==syInt){ int WId=Lx.Int; Lx.GetSym(syColon); Lx.GetSym(syFlt); double WordFq=Lx.Flt; Lx.GetSym(syInt, syEoln); SpV->AddWIdWgt(WId, WordFq); if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);} WIdToFqH.AddDat(WId)++; } if (!Lx.CmtStr.Empty()){ // change document name to 'N' if comment 'docDesc=N' TStr CmtStr=Lx.CmtStr; static TStr DocNmPrefixStr="docDesc="; if (CmtStr.IsPrefix(DocNmPrefixStr)){ TStr NewDocNm= TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1); BowDocBs->DocNmToDescStrH.DelKey(DocNm); int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm); IAssert(DId==NewDId); } } SpV->Trunc(); while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} } } // test data if (!TestDataFNm.Empty()){ PSIn SIn=TFIn::New(TestDataFNm); TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept); while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} while (Lx.Sym==syInt){ // document TStr DocNm=TInt::GetStr(BowDocBs->GetDocs()); int DId; if (DocDefP){ DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm); } else { DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm); } BowDocBs->TestDIdV.Add(DId); // category (class value) int CId=(Lx.Int==-1) ? MOneCId : POneCId; BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1); BowDocBs->DocCIdVV.Last().Gen(1, 0); BowDocBs->DocCIdVV.Last().Add(CId); // words (attributes) PBowSpV SpV=TBowSpV::New(DId); BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1); Lx.GetSym(syInt, syEoln); while (Lx.Sym==syInt){ int WId=Lx.Int; Lx.GetSym(syColon); Lx.GetSym(syFlt); double WordFq=Lx.Flt; Lx.GetSym(syInt, syEoln); SpV->AddWIdWgt(WId, WordFq); if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);} WIdToFqH.AddDat(WId)++; } if (!Lx.CmtStr.Empty()){ // change document name to 'N' if comment 'docDesc=N' TStr CmtStr=Lx.CmtStr; static TStr DocNmPrefixStr="docDesc="; if (CmtStr.IsPrefix(DocNmPrefixStr)){ TStr NewDocNm= TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1); BowDocBs->DocNmToDescStrH.DelKey(DocNm); int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm); IAssert(DId==NewDId); } } SpV->Trunc(); while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} } } // add missing words for (int WId=0; WId<=MxWId; WId++){ if (!BowDocBs->IsWId(WId)){ TStr WordStr=TInt::GetStr(WId, "W%d"); int _WId=BowDocBs->AddWordStr(WordStr); IAssert(WId==_WId); TInt Fq; if (WIdToFqH.IsKeyGetDat(WId, Fq)){ BowDocBs->PutWordFq(WId, Fq); } } } BowDocBs->AssertOk(); return BowDocBs; }
int FastCorePeripheryGC(PUNGraph& Graph, TIntIntH& out) { TIntH GroupNodes; // buildup cpntainer of group nodes int *NNodes = new int[Graph->GetNodes()]; // container of neighbouring nodes int NNodes_br = 0; TIntIntH nodes; TIntIntH nodesIds; double Z=0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { // Calculate and store the degrees of each node. int deg = NI.GetDeg(); int id = NI.GetId(); Z += deg; nodes.AddDat(id,deg); } Z = Z/2; nodes.SortByDat(false); // Then sort the nodes in descending order of degree, to get a list of nodes {v1, v2, . . . , vn}. int br1=0; for (THashKeyDatI<TInt,TInt> NI = nodes.BegI(); NI < nodes.EndI(); NI++) { nodesIds.AddDat(NI.GetKey(),NI.GetKey()); br1++; } double Zbest = 99999900000000000; //int kbest; //int olddeg; int br=0; for (int k=0; k<nodes.Len(); k++) { if (k<nodes.Len()-1) { if (nodes[k]==nodes[k+1]) { // go into same deg mode int kmin=-2; int knew=-1; while (kmin < 999999 && kmin !=-1 ) { int kind=-1; knew=k; kmin=999999; while(nodes[k]==nodes[knew] && knew < nodes.Len()-1) { int inter = Intersect(Graph->GetNI(nodesIds[knew]),NNodes,NNodes_br); int deg = nodes[knew]; //if (((((nodes.Len()-NNodes_br)*(nodes.Len()-NNodes_br)))-(nodes.Len()-NNodes_br))/2<(((br*br)-br)/2)) if ((deg-inter)<kmin && !GroupNodes.IsKey(nodesIds[knew])) { kmin = deg-inter; kind = knew; } knew++; } if (kind!=-1) { br++; Z = Z + br - 1 - nodes[kind]; if (Z < (Zbest)) { // or <= //if (olddeg>nodes[kind]) //olddeg = nodes[kind]; Zbest = Z; //kbest = br; int w = nodes[kind]; int id = nodesIds[kind]; GroupNodes.AddDat(id,w); NNodes[NNodes_br] = id; NNodes_br++; } else { break; } } } k=knew-1; } else { br++; Z = Z + br - 1 - nodes[k]; if (Z < (Zbest)) { // or <= //if (olddeg>nodes[k]) //olddeg = nodes[k]; Zbest = Z; //kbest = br; int w = nodes[k]; int id = nodesIds[k]; GroupNodes.AddDat(id,w); NNodes[NNodes_br] = id; NNodes_br++; } } } else { br++; Z = Z + br - 1 - nodes[k]; if (Z < Zbest) { // or <= //if (olddeg>nodes[k]) //olddeg = nodes[k]; Zbest = Z; //kbest = br; int w = nodes[k]; int id = nodesIds[k]; GroupNodes.AddDat(id,w); NNodes[NNodes_br] = id; NNodes_br++; } } } int cp = 0; br = 0; for (THashKeyDatI<TInt, TInt> it = nodes.BegI(); !it.IsEnd(); it++) { if (GroupNodes.IsKey(it.GetKey())) cp = 1; else cp = 0; out.AddDat(it.GetKey(), cp); br++; } /*for (THashKeyDatI<TInt, TInt> it = GroupNodes.BegI(); it < GroupNodes.EndI(); it++) { out.AddDat(it.GetKey(), 1); br++; }*/ //return kbest; return GroupNodes.Len(); }