int TStrUtil::SplitWords(TChA& ChA, TVec<char *>& WrdV, const bool& SplitOnWs) { WrdV.Clr(false); WrdV.Add(ChA.CStr()); for (char *c = (char *) ChA.CStr(); *c; c++) { if ((SplitOnWs && *c == ' ') || (! SplitOnWs && ! TCh::IsAlNum(*c))) { *c = 0; if (! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); } WrdV.Add(c+1); } } return WrdV.Len(); }
void TempMotifCounter::AddStarEdgeData(TVec<TIntPair>& ts_indices, TVec<StarEdgeData>& events, int& index, int u, int v, int nbr, int key) { if (HasEdges(u, v)) { const TIntV& ts_vec = temporal_data_[u].GetDat(v); for (int j = 0; j < ts_vec.Len(); ++j) { ts_indices.Add(TIntPair(ts_vec[j], index)); events.Add(StarEdgeData(nbr, key)); index++; } } }
void TempMotifCounter::AddTriadEdgeData(TVec<TriadEdgeData>& events, TVec<TIntPair>& ts_indices, int& index, int u, int v, int nbr, int key1, int key2) { if (HasEdges(u, v)) { const TIntV& timestamps = temporal_data_[u].GetDat(v); for (int i = 0; i < timestamps.Len(); i++) { ts_indices.Add(TIntPair(timestamps[i], index)); events.Add(TriadEdgeData(nbr, key1, key2)); ++index; } } }
int TStrUtil::SplitOnCh(TChA& ChA, TVec<char *>& WrdV, const char& Ch, const bool& SkipEmpty) { WrdV.Clr(false); WrdV.Add(ChA.CStr()); for (char *c = (char *) ChA.CStr(); *c; c++) { if (*c == Ch) { *c = 0; if (SkipEmpty && ! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); } WrdV.Add(c+1); } } if (SkipEmpty && ! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); } return WrdV.Len(); }
uint64 TGraphEnumUtils::GetMinAndGraphIds(const TVec<PNGraph> &isoG, TVec<uint64> &graphIds) { IAssert(isoG.Len() > 0); // uint64 minGraphId = GraphId(isoG[0]); graphIds.Add(minGraphId); // for(int i=1; i<isoG.Len(); i++) { uint64 curGraphId = GraphId(isoG[i]); if(minGraphId > curGraphId) minGraphId=curGraphId; // graphIds.Add(curGraphId); } // return minGraphId; }
int main(int argc, char *argv[]) { TDocBase *DocBase = new TDocBase; TChA Url = TChA("http://www.newyorktimes.com/news_story"); TSecTm Date = TSecTm::GetCurTm(); TChA Content = TChA("foo bar foo foo"); TVec<TChA> Links; Links.Add(TChA("http://www.google.com")); Links.Add(TChA("http://www.yahoo.com")); DocBase->AddDoc(Url, Date, Content, Links); printf("Number of documents: %d\n", DocBase->Len()); TDoc t; DocBase->GetDoc(0, t); TStr tUrl; t.GetUrl(tUrl); printf("URL: %s\n", tUrl.CStr()); TStrV l; t.GetLinks(l); printf("Link1: %s\n", l[0].CStr()); printf("Link2: %s\n", l[1].CStr()); { TFOut FOut("tmp.bin"); DocBase->Save(FOut); } printf("Save data successfully\n"); delete DocBase; TFIn FIn("tmp.bin"); printf("Load data successfully\n"); TDocBase *DocBase2 = new TDocBase; DocBase2->Load(FIn); printf("Number of documents: %d\n", DocBase2->Len()); TDoc t2; DocBase2->GetDoc(0, t2); TStr t2Url; t2.GetUrl(t2Url); printf("URL: %s\n", t2Url.CStr()); t2.GetLinks(l); printf("Link1: %s\n", l[0].CStr()); printf("Link2: %s\n", l[1].CStr()); delete DocBase2; return 0; }
void TVizMapContext::PaintKeyWds(PGks Gks, const int& KeyWdFontSize, TVec<TFltRect>& PointNmRectV) { // set font Gks->SetFont(TGksFont::New("ARIAL", KeyWdFontSize, ColorKeyWdFont)); // prepare parameters TFltRect ZoomRect = GetZoomRect(); const double MnZoomRectSize = TFlt::GetMn(ZoomRect.GetXLen(), ZoomRect.GetYLen()); const double MnRptDist = RelMnRptDist * MnZoomRectSize; const double Sparsity = RelSparsity * MnZoomRectSize; TVizMapKeyWdV OkKeyWdV; // start drawing the keywords int KeyWds = VizMapFrame->GetKeyWds(); //SysConsole->PutLn(TStr::Fmt("Starting to draw %d keywords", KeyWds)); for (int KeyWdN = 0; KeyWdN < KeyWds; KeyWdN++) { PVizMapKeyWd KeyWd = VizMapFrame->GetKeyWd(KeyWdN); TStr KeyWdStr = KeyWd->GetKeyWdStr().GetLc(); const double KeyWdX = KeyWd->GetKeyWdX(); const double KeyWdY = KeyWd->GetKeyWdY(); if (ZoomRect.IsXYIn(KeyWdX, KeyWdY)) { // get coordinates in pixels const int X = GetScreenCoord(KeyWdX, ZoomRect.GetMnX(), ZoomRect.GetXLen(), Gks->GetWidth()); const int Y = GetScreenCoord(KeyWdY, ZoomRect.GetMnY(), ZoomRect.GetYLen(), Gks->GetHeight()); // calculate string position on the screen const int HalfTxtWidth = Gks->GetTxtWidth(KeyWdStr) / 2; const int HalfTxtHeight = Gks->GetTxtHeight(KeyWdStr) / 2; TFltRect KeyWdRect(X - HalfTxtWidth, Y - HalfTxtHeight, X + HalfTxtWidth, Y + HalfTxtHeight); // should we draw it? bool DoDrawP = true; const int Rects = PointNmRectV.Len(); for (int RectN = 0; RectN < Rects; RectN++) { if (TFltRect::Intersection(KeyWdRect, PointNmRectV[RectN])) { DoDrawP = false; break; } } if (!DoDrawP) { continue; } // word overlaps, we skip it // check if it fits the sparsity and repetition constraint TFltV KeyWdCoodV = TFltV::GetV(KeyWdX, KeyWdY); //SysConsole->PutLn(TStr::Fmt("Keyword: %s (%.2f, %.2f)", KeyWdStr.CStr(), KeyWdX, KeyWdY)); const int OkKeyWds = OkKeyWdV.Len(); bool KeyWdOkP = true; for (int OkKeyWdN = 0; OkKeyWdN < OkKeyWds; OkKeyWdN++) { PVizMapKeyWd OkKeyWd = OkKeyWdV[OkKeyWdN]; TStr OkKeyWdStr = OkKeyWd->GetKeyWdStr().GetLc(); const double Dist = TLinAlg::EuclDist(KeyWdCoodV, OkKeyWd->GetCoordV()); if (Dist < Sparsity) { KeyWdOkP = false; break; } if (OkKeyWdStr == KeyWdStr) { //SysConsole->PutLn(TStr::Fmt(" Comparing to %s(%.2f, %.2f): %.2f / %.2f / %.2f", // OkKeyWdStr.CStr(), OkKeyWd->GetKeyWdX(), OkKeyWd->GetKeyWdY(), Dist, Sparsity, MnRptDist)); if (Dist < MnRptDist) { KeyWdOkP = false; break; } } } if (!KeyWdOkP) { continue; } // word repeats or is to dense, we skip it //SysConsole->PutLn(" All ok"); // draw it! Gks->PutTxt(KeyWdStr, X - HalfTxtWidth, Y - HalfTxtHeight); PointNmRectV.Add(KeyWdRect); OkKeyWdV.Add(KeyWd); } } }
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllCandidatePairs() { THashSet<TPair<TInt, TInt> > CandidateIdPairs; for (int i=0; i<Bands; i++) { TVec<TIntV> BucketVV; SigBucketVHV[i].GetDatV(BucketVV); for (int j=0; j<BucketVV.Len(); j++) { TIntV BucketV = BucketVV[j]; for (int k=0; k<BucketV.Len(); k++) { for (int l=k+1; l<BucketV.Len(); l++) { int First = BucketV[k], Second = BucketV[l]; if (First > Second) { int Temp = First; First = Second; Second = Temp; } CandidateIdPairs.AddKey(TPair<TInt, TInt> (First, Second)); } } } } TVec<TPair<TFltV, TFltV> > CandidatePairs; int Ind = CandidateIdPairs.FFirstKeyId(); while (CandidateIdPairs.FNextKeyId(Ind)) { TPair<TInt, TInt> IdPair = CandidateIdPairs[Ind]; TPair<TFltV, TFltV> Pair(DataV[IdPair.GetVal1()], DataV[IdPair.GetVal2()]); CandidatePairs.Add(Pair); } return CandidatePairs; }
// DyNetML format, loads all the networks in the file TVec<PNGraph> LoadDyNetGraphV(const TStr& FNm) { TXmlLx XmlLx(TFIn::New(FNm), xspTruncate); TVec<PNGraph> GraphV; THashSet<TStr> NIdStr; while (XmlLx.GetSym()!=xsyEof) { if (XmlLx.Sym==xsySTag && XmlLx.TagNm=="network") { PNGraph G = TNGraph::New(); GraphV.Add(G); XmlLx.GetSym(); while (XmlLx.TagNm=="link") { TStr Str1, Val1, Str2, Val2; XmlLx.GetArg(0, Str1, Val1); XmlLx.GetArg(1, Str2, Val2); IAssert(Str1=="source" && Str2=="target"); NIdStr.AddKey(Val1); NIdStr.AddKey(Val2); const int src=NIdStr.GetKeyId(Val1); const int dst=NIdStr.GetKeyId(Val2); if (! G->IsNode(src)) { G->AddNode(src); } if (! G->IsNode(dst)) { G->AddNode(dst); } G->AddEdge(src, dst); XmlLx.GetSym(); } } } return GraphV; }
// initialize javascript void InitJs(const TQmParam& Param, const TQm::PBase& Base, const TStr& OnlyScriptNm, TVec<TQm::PScript>& ScriptV) { if (!OnlyScriptNm.Empty()) { TQm::InfoLog("Set limit to script " + OnlyScriptNm); } for (int JsN = 0; JsN < Param.JsParamV.Len(); JsN++) { const TQmParam::TJsParam& JsParam = Param.JsParamV[JsN]; // skip if required if (!OnlyScriptNm.Empty() && JsParam.Nm != OnlyScriptNm) { TQm::InfoLog("Skipping script " + JsParam.Nm); continue; } // otherwise continue with load TQm::InfoLog("Loading script " + JsParam.FNm.GetFMid() + "..."); try { // initialize javascript engine TVec<TQm::TJsFPath> JsFPathV; TQm::TJsFPath::GetFPathV(JsParam.AccessFPathV, JsFPathV); TQm::PScript Script = TQm::TScript::New(Base, JsParam.Nm, JsParam.FNm, JsParam.IncludeFPathV, JsFPathV); // remember the context ScriptV.Add(Script); // done TQm::InfoLog(" done"); } catch (PExcept& Except) { TQm::ErrorLog("Error loading script " + JsParam.FNm.GetFMid() + ":"); TQm::ErrorLog(" " + Except->GetMsgStr()); } } }
void TMultimodalGraphImplB::GetNodeIdsInMode(const int ModeId, TVec< TPair<TInt,TInt> >& NodeIds) const { for (THash<TInt,TInt>::TIter NI=NodeToModeMapping.BegI(); NI<NodeToModeMapping.EndI(); NI++) { if (NI.GetDat() == ModeId) { NodeIds.Add(TPair<TInt,TInt>(NI.GetDat(), NI.GetKey())); } } }
// to get first few eigenvectors void GetEigVec(const PUNGraph& Graph, const int& EigVecs, TFltV& EigValV, TVec<TFltV>& EigVecV) { const int Nodes = Graph->GetNodes(); // Lanczos TUNGraphMtx GraphMtx(Graph); int CalcVals = int(2*EigVecs); if (CalcVals > Nodes) { CalcVals = Nodes; } TFltVV EigVecVV; //while (EigValV.Len() < EigVecs && CalcVals < 10*EigVecs) { try { TSparseSVD::Lanczos(GraphMtx, EigVecs, 2*EigVecs, ssotFull, EigValV, EigVecVV, false); } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } if (EigValV.Len() < EigVecs) { printf(" ***TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } // CalcVals += EigVecs; //} TFltIntPrV EigValIdV; for (int i = 0; i < EigValV.Len(); i++) { EigValIdV.Add(TFltIntPr(EigValV[i], i)); } EigValIdV.Sort(false); EigValV.Sort(false); for (int v = 0; v < EigValIdV.Len(); v++) { // vector components are not sorted!!! EigVecV.Add(); EigVecVV.GetCol(EigValIdV[v].Val2, EigVecV.Last()); } IsAllValVNeg(EigVecV[0], true); }
int main() { TLSHash LSH(7, 7, DIM, TLSHash::EUCLIDEAN); LSH.Init(); TRnd Gen; Gen.Randomize(); TVec<TFltV> DataV; for (int i=0; i<1000000; i++) { TFltV Datum; for (int j=0; j<3; j++) { Datum.Add(Gen.GetUniDev()*2100); } DataV.Add(Datum); } LSH.AddV(DataV); TVec<TPair<TFltV, TFltV> > NeighborsV = LSH.GetAllCandidatePairs(); printf("Number of Candidates: %d\n", NeighborsV.Len()); NeighborsV = LSH.GetAllNearPairs(); printf("Number of Close Pairs: %d\n", NeighborsV.Len()); for (int i=0; i<NeighborsV.Len(); i++) { outputPoint(NeighborsV[i].GetVal1()); printf(" "); outputPoint(NeighborsV[i].GetVal2()); printf("\n"); } return 0; }
/// extract community affiliation from F_uc void TAGMFast::GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) { CmtyVV.Gen(NumComs, 0); TIntFltH CIDSumFH(NumComs); for (int c = 0; c < SumFV.Len(); c++) { CIDSumFH.AddDat(c, SumFV[c]); } CIDSumFH.SortByDat(false); for (int c = 0; c < NumComs; c++) { int CID = CIDSumFH.GetKey(c); TIntFltH NIDFucH(F.Len() / 10); TIntV CmtyV; IAssert(SumFV[CID] == CIDSumFH.GetDat(CID)); if (SumFV[CID] < Thres) { continue; } for (int u = 0; u < F.Len(); u++) { int NID = u; if (! NodesOk) { NID = NIDV[u]; } if (GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(u, CID)); } } NIDFucH.SortByDat(false); NIDFucH.GetKeyV(CmtyV); if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); } } if ( NumComs != CmtyVV.Len()) { printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len()); } }
// returns a set of clusters representing the separate types contained in the input cluster static TVec<TCluster> GetSingleTypeClusters(const TCluster& cluster, const THash<TInt,TVec<TInt> >& quotePages, THash<TInt,TWebpage> pageHash) { TVec<TCluster> clusters = TVec<TCluster>::TVec<TCluster>(); if (cluster.NoTypes() == 1) { clusters.Add(TCluster(cluster)); return clusters; } TCluster c; TVec<TInt> selfList; for (int i = 0; i < cluster.NoTypes(); i++) { selfList = TVec<TInt>::TVec<TInt>(); selfList.Add(cluster.quotes[i]); c = TCluster::TCluster(cluster.quotes[i], selfList, quotePages, pageHash); clusters.Add(c); } return clusters; }
/// Clique Percolation method communities void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) { printf("Clique Percolation Method\n"); TExeTm ExeTm; TVec<TIntV> MaxCliques; TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques); // op RS 2012/05/15, commented out next line, a parameter is missing, // creating a warning on OS X // printf("...%d cliques found\n"); // get clique overlap matrix (graph) PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1); printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges()); // connected components are communities TCnComV CnComV; TSnap::GetWccs(OverlapGraph, CnComV); NIdCmtyVV.Clr(false); TIntSet CmtySet; for (int c = 0; c < CnComV.Len(); c++) { CmtySet.Clr(false); for (int i = 0; i <CnComV[c].Len(); i++) { const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]]; CmtySet.AddKeyV(CliqueNIdV); } NIdCmtyVV.Add(); CmtySet.GetKeyV(NIdCmtyVV.Last()); NIdCmtyVV.Last().Sort(); } printf("done [%s].\n", ExeTm.GetStr()); }
void TFfGGen::GenFFGraphs(const double& FProb, const double& BProb, const TStr& FNm) { const int NRuns = 10; const int NNodes = 10000; TGStat::NDiamRuns = 10; //const double FProb = 0.35, BProb = 0.20; // ff1 //const double FProb = 0.37, BProb = 0.32; // ff2 //const double FProb = 0.37, BProb = 0.325; // ff22 //const double FProb = 0.37, BProb = 0.33; // ff3 //const double FProb = 0.37, BProb = 0.35; // ff4 //const double FProb = 0.38, BProb = 0.35; // ff5 TVec<PGStatVec> GAtTmV; TFfGGen FF(false, 1, FProb, BProb, 1.0, 0, 0); for (int r = 0; r < NRuns; r++) { PGStatVec GV = TGStatVec::New(tmuNodes, TGStat::AllStat()); FF.GenGraph(NNodes, GV, true); for (int i = 0; i < GV->Len(); i++) { if (i == GAtTmV.Len()) { GAtTmV.Add(TGStatVec::New(tmuNodes, TGStat::AllStat())); } GAtTmV[i]->Add(GV->At(i)); } IAssert(GAtTmV.Len() == GV->Len()); } PGStatVec AvgStat = TGStatVec::New(tmuNodes, TGStat::AllStat()); for (int i = 0; i < GAtTmV.Len(); i++) { AvgStat->Add(GAtTmV[i]->GetAvgGStat(false)); } AvgStat->PlotAllVsX(gsvNodes, FNm, TStr::Fmt("Forest Fire: F:%g B:%g (%d runs)", FProb, BProb, NRuns)); AvgStat->Last()->PlotAll(FNm, TStr::Fmt("Forest Fire: F:%g B:%g (%d runs)", FProb, BProb, NRuns)); }
int GetWeightedPageRankMP1(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) { if (!Graph->IsFltAttrE(Attr)) return -1; TFltV Weights = Graph->GetFltAttrVecE(Attr); int mxid = Graph->GetMxNId(); TFltV OutWeights(mxid); Graph->GetWeightOutEdgesV(OutWeights, Weights); /*for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { OutWeights[NI.GetId()] = Graph->GetWeightOutEdges(NI, Attr); }*/ /*TIntFltH Weights; for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { Weights.AddDat(NI.GetId(), Graph->GetWeightOutEdges(NI, Attr)); }*/ const int NNodes = Graph->GetNodes(); TVec<TNEANet::TNodeI> NV; //const double OneOver = 1.0/double(NNodes); PRankH.Gen(NNodes); for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NV.Add(NI); PRankH.AddDat(NI.GetId(), 1.0/NNodes); //IAssert(NI.GetId() == PRankH.GetKey(PRankH.Len()-1)); } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; TmpV[j] = 0; for (int e = 0; e < NI.GetInDeg(); e++) { const int InNId = NI.GetInNId(e); const TFlt OutWeight = OutWeights[InNId]; int EId = Graph->GetEId(InNId, NI.GetId()); const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)]; if (OutWeight > 0) { TmpV[j] += PRankH.GetDat(InNId) * Weight / OutWeight; } } TmpV[j] = C*TmpV[j]; // Berkhin (the correct way of doing it) //TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph } double diff=0, sum=0, NewVal; #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000) for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; } const double Leaked = (1.0-sum) / double(NNodes); #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000) for (int i = 0; i < PRankH.Len(); i++) { // re-instert leaked PageRank NewVal = TmpV[i] + Leaked; // Berkhin //NewVal = TmpV[i] / sum; // iGraph diff += fabs(NewVal-PRankH[i]); PRankH[i] = NewVal; } if (diff < Eps) { break; } } return 0; }
void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) { const int Nodes = Graph->GetNodes(); SngValV.Clr(); LeftSV.Clr(); RightSV.Clr(); TFltVV LSingV, RSingV; if (Nodes < 100) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TIntH NodeIdH; // create adjecency matrix (1-based) for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV); //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt"); //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX } TFltIntPrV SngValIdV; for (int i = 0; i < SngValV.Len(); i++) { SngValIdV.Add(TFltIntPr(SngValV[i], i)); } SngValIdV.Sort(false); SngValV.Sort(false); for (int v = 0; v < SngValIdV.Len(); v++) { LeftSV.Add(); LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last()); RightSV.Add(); RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last()); } IsAllValVNeg(LeftSV[0], true); IsAllValVNeg(RightSV[0], true); }
void TTokenizerUtil::ToSequencesForLength(const TStrV& TokenIds, TVec<TStrV >& TupleList, int n) { for (int i = 0; i < TokenIds.Len() - n + 1; i++) { TStrV& Tuple = TupleList[TupleList.Add()]; for (int j = 0; j < n; j++) { Tuple.Add(TokenIds[i + j]); } } }
void TIndex::TQmGixSumWithoutFqMerger<TQmGixItem, TQmGixResItem>::Def(const TQmGixKey& Key, TVec<TQmGixItem>& MainV, TVec<TQmGixResItem>& ResV) const { ResV.Gen(MainV.Len(), 0); for (TQmGixItem& Item : MainV) { ResV.Add(TQmGixResItem(Item.Val, 1)); } }
void TIndex::TQmGixSumMerger<TQmGixItem>::Union(TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV) const { TVec<TQmGixItem> ResV; int ValN1 = 0; int ValN2 = 0; while ((ValN1 < MainV.Len()) && (ValN2 < JoinV.Len())) { const TQmGixItem& Val1 = MainV.GetVal(ValN1); const TQmGixItem& Val2 = JoinV.GetVal(ValN2); if (Val1 < Val2) { ResV.Add(Val1); ValN1++; } else if (Val1 > Val2) { ResV.Add(Val2); ValN2++; } else { ResV.Add(TQmGixItem(Val1.Key, Val1.Dat + Val2.Dat)); ValN1++; ValN2++; } } for (int RestValN1 = ValN1; RestValN1 < MainV.Len(); RestValN1++) { ResV.Add(MainV.GetVal(RestValN1)); } for (int RestValN2 = ValN2; RestValN2 < JoinV.Len(); RestValN2++) { ResV.Add(JoinV.GetVal(RestValN2)); } MainV = ResV; }
// backup all profiles void TFolderBackup::CreateBackup(TVec<TBackupLogInfo>& BackupLogInfo) { for (int KeyId = ProfileH.FFirstKeyId(); ProfileH.FNextKeyId(KeyId);) { const TStr ProfileName = ProfileH.GetKey(KeyId); TBackupLogInfo Info = CreateBackup(ProfileName); BackupLogInfo.Add(Info); } }
/////////////////////////////////////////////////////////////////////////////// // Star counting methods void TempMotifCounter::AddStarEdges(TVec<TIntPair>& combined, int u, int v, int key) { if (HasEdges(u, v)) { const TIntV& timestamps = temporal_data_[u].GetDat(v); for (int i = 0; i < timestamps.Len(); i++) { combined.Add(TIntPair(timestamps[i], key)); } } }
void TStrFeatureSpace::GetIds(const TStrV& Features, TVec<TStrFSSize>& Ids) const { for (int i = 0; i < Features.Len(); i++) { int KeyId = ISpace.GetKeyId(Features[i]); if (KeyId == -1) { printf("Warning: unknown token: %s\n", Features[i].CStr()); } else { Ids.Add(ISpace[KeyId]); } } }
TVec<TFltV> TLSHash::GetNearPoints(TFltV Datum) { TVec<TFltV> Candidates = GetCandidates(Datum); TVec<TFltV> NearPoints; for (int i=0; i<Candidates.Len(); i++) { if (IsNear(Datum, Candidates[i])) NearPoints.Add(Candidates[i]); } return NearPoints; }
int TStrUtil::SplitSentences(TChA& ChA, TVec<char *>& SentenceV) { SentenceV.Clr(); const char *B = ChA.CStr(); const char *E = B+ChA.Len(); char *c = (char *) B; while (*c && TCh::IsWs(*c)) { c++; } if (*c) { SentenceV.Add(c); } else { return 0; } for (; c < E; c++) { if (c<E && (*c == '.' || *c == '!' || *c == '?') && ! TCh::IsAlNum(*(c+1))) { // end of sentence if (c<E && *(c+1)=='"') { *c='"'; c++; } // blah." --> blah" if (c>=E) { continue; } *c=0; c++; char *e = c-1; while (e>B && *e!='"' && ! TCh::IsAlNum(*e)) { *e=0; e--; } // skip trailing non-alpha-num chars while (c<E && ! (TCh::IsAlNum(*c) || (*c=='"' && TCh::IsAlNum(*(c+1))))) { c++; } // sentence starts with AlNum or "AlNum if (c<E) { SentenceV.Add(c); } } } return SentenceV.Len(); }
void TIndex::TQmGixSumMerger<TQmGixItem>::Intrs(TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV) const { TVec<TQmGixItem> ResV; int ValN1 = 0; int ValN2 = 0; while ((ValN1 < MainV.Len()) && (ValN2 < JoinV.Len())) { const TQmGixItem& Val1 = MainV.GetVal(ValN1); const TQmGixItem& Val2 = JoinV.GetVal(ValN2); if (Val1 < Val2) { ValN1++; } else if (Val1 > Val2) { ValN2++; } else { ResV.Add(TQmGixItem(Val1.Key, Val1.Dat + Val2.Dat)); ValN1++; ValN2++; } } MainV = ResV; }
int TStrUtil::SplitLines(TChA& ChA, TVec<char *>& LineV, const bool& SkipEmpty) { LineV.Clr(false); LineV.Add(ChA.CStr()); bool IsChs=false; for (char *c = (char *) ChA.CStr(); *c; c++) { if (*c == '\n') { if (c > ChA.CStr() && *(c-1)=='\r') { *(c-1)=0; } // \r\n *c=0; if (SkipEmpty) { if (IsChs) { LineV.Add(c+1); } } else { LineV.Add(c+1); } IsChs=false; } else { IsChs=true; } } return LineV.Len(); }
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllNearPairs() { TVec<TPair<TFltV, TFltV> > CandidatePairs = GetAllCandidatePairs(); TVec<TPair<TFltV, TFltV> > NearPairs; for (int i=0; i<CandidatePairs.Len(); i++) { if (IsNear(CandidatePairs[i].GetVal1(), CandidatePairs[i].GetVal2())) { NearPairs.Add(CandidatePairs[i]); } } return NearPairs; }