PAlignPair TAlignPair::LoadAcXml(const TStr& FNm, const int& MxSents) { printf("Loading %s ...\n", FNm.CStr()); // get the lanugagne names TStr BaseNm = FNm.GetFMid(); TStrV PartV; BaseNm.SplitOnAllCh('-', PartV); IAssertR(PartV.Len() == 3, "Bad file name: " + BaseNm); // prepare aligne pair PAlignPair AlignPair = TAlignPair::New(PartV[1], PartV[2]); // parse the XML PTransCorpus TransCorpus = TTransCorpus::LoadAC(FNm, MxSents * 4); // select subset of sentences which will go into aligned corpus const int AllSents = TransCorpus->GetSentences(); TIntV SentIdV(AllSents, 0); for (int SentId = 0; SentId < AllSents; SentId++) { SentIdV.Add(SentId); } if (MxSents != -1 && AllSents > MxSents) { TRnd Rnd(1); SentIdV.Shuffle(Rnd); SentIdV.Trunc(MxSents); } // add the sentences to the bow const int Sents = SentIdV.Len(); for (int SentIdN = 0; SentIdN < Sents; SentIdN++) { const int SentId = SentIdV[SentIdN]; const TStr& Sent1 = TransCorpus->GetOrgStr(SentId); const TStr& Sent2 = TransCorpus->GetRefTransStrV(SentId)[0]; AlignPair->AddSent(Sent1, Sent2); } // finish the alignment pair AlignPair->Def(); return AlignPair; }
void TGgSchRef::GetAuthNmVPubStr( const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){ // split input string into two parts TStr AuthNmVStr; TStr PubStr; AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr); // author-names string AuthNmVStr.SplitOnAllCh(',', AuthNmV, true); for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){ AuthNmV[AuthN].ToTrunc(); } if ((!AuthNmV.Empty())&& ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){ AuthNmV.DelLast(); } // publication-name & publication-year string TStr OriginStr; TStr LinkStr; PubStr.SplitOnStr(OriginStr, " - ", LinkStr); OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr); PubNm.ToTrunc(); PubYearStr.ToTrunc(); if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){ PubYearStr=PubYearStr.GetSubStr(0, 3); } else if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){ PubYearStr=PubNm.GetSubStr(0, 3); PubNm=""; } else { PubYearStr=""; } }
PLwOnto TLwOnto::LoadVoc(const TStr& FNm) { // create ontology PLwOnto LwOnto = TLwOnto::New(); // create language object int EnLangId = LwOnto->GetLangBs()->AddLang("EN", "English"); // create term-types PLwTermType TermType = TLwTermType::New(0, "Term", EnLangId); LwOnto->GetTermTypeBs()->AddTermType(TermType); // create terms {printf("Creating terms ...\n"); // load terms from file TStr VocFileStr = TStr::LoadTxt(FNm); VocFileStr.DelChAll('\r'); TStrV TermNmV; VocFileStr.SplitOnAllCh('\n', TermNmV); // add terms to base const int Terms = TermNmV.Len(); for (int TermId = 0; TermId<Terms; TermId++){ if (TermId%1000==0){printf("%d/%d\r", TermId, Terms);} TStr TermNm = TermNmV[TermId]; // create term PLwTerm Term=TLwTerm::New(TermId, TermNm, EnLangId, TermType->GetTermTypeId()); LwOnto->GetTermBs()->AddTerm(Term); } printf("Done. (%d)\n", LwOnto->GetTermBs()->GetTerms());} // return ontology return LwOnto; }
TStr TNodeJsFPath::GetCanonicalPath(const TStr& FPath) { // Get absolute path TStr AbsFPath = TStr::GetNrAbsFPath(FPath); // Remove any redundancies TStrV CanonV; AbsFPath.SplitOnAllCh('/', CanonV); TSStack<TStr> CanonS; TStr CurrStr; for (int ElN = 0; ElN < CanonV.Len(); ++ElN) { CurrStr = CanonV.GetVal(ElN); if (CurrStr == "..") { EAssertR(!CanonS.Empty(), "Stack empty"); CanonS.Pop(); } else if (CurrStr != ".") { CanonS.Push(CurrStr+"/"); } } // Assemble the canonical path (from left to right EAssertR(!CanonS.Empty(), "Stack empty"); // We start with drive letter (Windows) or slash (Unix) TChA CanonFPath = AbsFPath.LeftOf('/'); CanonFPath += '/'; // Get the rest of the path for (int CanonN = CanonS.Len() - 1; CanonN >= 0; CanonN--) { CanonFPath += CanonS[CanonN]; } // Done return CanonFPath; }
///////////////////////////////////////////////// // Geographical-IP void TGeoIpBs::GetCountryNm(const TStr& IpNumStr, TStr& CountrySNm, TStr& CountryLNm){ // prepare country-names CountrySNm="--"; CountryLNm="Unknown"; // split ip-num to sub-number-strings TStrV IpSubNumStrV; IpNumStr.SplitOnAllCh('.', IpSubNumStrV, false); // convert sub-number-strings to sub-numbers and ip-number int IpSubNum0, IpSubNum1, IpSubNum2, IpSubNum3; uint IpNum; if ( IpSubNumStrV[0].IsInt(true, 0, 255, IpSubNum0)&& IpSubNumStrV[1].IsInt(true, 0, 255, IpSubNum1)&& IpSubNumStrV[2].IsInt(true, 0, 255, IpSubNum2)&& IpSubNumStrV[3].IsInt(true, 0, 255, IpSubNum3)){ IpNum=16777216*IpSubNum0+65536*IpSubNum1+256*IpSubNum2+IpSubNum3; } else { return; } // get country-id from ip-number int CountryId=-1; int IpNumN; CountryMnIpNumV.SearchBin(IpNum+1, IpNumN); if (IpNumN>0){ uint MnIpNum=CountryMnIpNumV[IpNumN-1]; uint MxIpNum=MnIpNumToMxIpNumCountryIdPrH.GetDat(MnIpNum).Val1; if ((MnIpNum<=IpNum)&&(IpNum<=MxIpNum)){ CountryId=MnIpNumToMxIpNumCountryIdPrH.GetDat(MnIpNum).Val2; } } // get country-names if (CountryId!=-1){ CountrySNm=CountrySNmToLNmH.GetKey(CountryId); CountryLNm=CountrySNmToLNmH[CountryId]; } }
void TFtrGenSparseNumeric::Update(const TStr& Str) { TStrV EltV; Str.SplitOnAllCh(';', EltV); for (int EltN = 0; EltN < EltV.Len(); EltN++) { int Id; TStr Val; Split(EltV[EltN], Id, Val); MxId = TInt::GetMx(Id, MxId); FtrGen->Update(Val); } }
void TFtrGenMultiNom::Add(const TStr& Str, TIntFltKdV& SpV, int& Offset) const { TStrV EltV; Str.SplitOnAllCh(';', EltV); for (int EltN = 0; EltN < EltV.Len(); EltN++) { int TmpOffset = Offset; FtrGen->Add(EltV[EltN], SpV, TmpOffset); } Offset += GetVals(); }
void TFtrGenMultiNom::GetValV(const TStr& Str, TStrV& ValV) const { TStrV EltV; Str.SplitOnAllCh(';', EltV); ValV.Clr(); for (int EltN = 0; EltN < EltV.Len(); EltN++) { const TStr& Val = EltV[EltN]; TStrV SubValV; FtrGen->GetValV(Val, SubValV); ValV.AddV(SubValV); } }
TEnv::TEnv(const TStr& _ArgStr, const PNotify& _Notify) : ArgV(), HdStr(), MnArgs(1), SilentP(false), Notify(_Notify) { _ArgStr.SplitOnAllCh(' ', ArgV); }
void TSwSet::LoadFromFile(const TStr& FNm) { TStr FileStr = TStr::LoadTxt(FNm); FileStr.DelChAll('\r'); TStrV WordV; FileStr.SplitOnAllCh('\n', WordV); for (int WordN = 0; WordN < WordV.Len(); WordN++) { const TStr& WordStr = WordV[WordN]; if (!IsIn(WordStr)) { AddWord(WordStr); } } }
void TNetInfBs::LoadCascadesTxt(TSIn& SIn, const int& Model, const double& alpha) { TStr Line; SIn.GetNextLn(Line); while (!SIn.Eof() && Line != "") { TStrV NIdV; Line.SplitOnAllCh(',', NIdV); AddNodeNm(NIdV[0].GetInt(), TNodeInfo(NIdV[1], 0)); SIn.GetNextLn(Line); } printf("All nodes read!\n"); while (!SIn.Eof()) { SIn.GetNextLn(Line); AddCasc(Line, Model, alpha); } printf("All cascades read!\n"); }
void TNetInfBs::LoadGroundTruthTxt(TSIn& SIn) { GroundTruth = TNGraph::New(); TStr Line; // add nodes SIn.GetNextLn(Line); while (!SIn.Eof() && Line != "") { TStrV NIdV; Line.SplitOnAllCh(',', NIdV); GroundTruth->AddNode(NIdV[0].GetInt()); SIn.GetNextLn(Line); } // add edges while (!SIn.Eof()) { SIn.GetNextLn(Line); TStrV NIdV; Line.SplitOnAllCh(',', NIdV); GroundTruth->AddEdge(NIdV[0].GetInt(), NIdV[1].GetInt()); Alphas.AddDat(TIntPr(NIdV[0].GetInt(), NIdV[1].GetInt())) = NIdV[2].GetFlt(); } printf("groundtruth nodes:%d edges:%d\n", GroundTruth->GetNodes(), GroundTruth->GetEdges()); }
void TFtrGenSparseNumeric::Add(const TStr& Str, TIntFltKdV& SpV, int& Offset) const { TStrV EltV; Str.SplitOnAllCh(';', EltV); TIntH UsedIdH; for (int EltN = 0; EltN < EltV.Len(); EltN++) { int Id; TStr Val; Split(EltV[EltN], Id, Val); EAssertR(!UsedIdH.IsKey(Id), "Field ID repeated in '" + Str + "'!"); int TmpOffset = Offset + Id; FtrGen->Add(Val, SpV, TmpOffset); UsedIdH.AddKey(Id); } Offset += GetVals(); }
void TGreedyAlg::addCascade(const TStr& cascadeStr) { TStrV NIdV; cascadeStr.SplitOnAllCh(';', NIdV); TCascade C; for (int i = 0; i < NIdV.Len(); i++) { TStr NId, Tm; NIdV[i].SplitOnCh(NId, ',', Tm); IAssert( IsNodeNm(NId.GetInt()) ); GetNodeInfo(NId.GetInt()).Vol = GetNodeInfo(NId.GetInt()).Vol + 1; C.Add(NId.GetInt(), Tm.GetFlt()); } C.Sort(); cascadeV.Add(C); }
void TGreedyAlg::loadCascadesFromFile(TSIn& SIn) { TStr line; SIn.GetNextLn(line); while (!SIn.Eof() && line != "") { TStrV NIdV; line.SplitOnAllCh(',', NIdV); addNodeNm(NIdV[0].GetInt(), TNodeInfo(NIdV[1], 0)); SIn.GetNextLn(line); } printf("All nodes read!\n"); while (!SIn.Eof()) { SIn.GetNextLn(line); addCascade(line); } printf("All cascades read!\n"); }
void TNetInfBs::AddCasc(const TStr& CascStr, const int& Model, const double& alpha) { TStrV NIdV; CascStr.SplitOnAllCh(',', NIdV); TCascade C(alpha, Model); for (int i = 0; i < NIdV.Len(); i+=2) { int NId; double Tm; NId = NIdV[i].GetInt(); Tm = NIdV[i+1].GetFlt(); GetNodeInfo(NId).Vol = GetNodeInfo(NId).Vol + 1; C.Add(NId, Tm); } C.Sort(); CascV.Add(C); }
///// Split on all occurrences of SplitCh, write to StrV, optionally don't create empy strings (default true) //void SplitOnAllCh(const char& SplitCh, TStrV& StrV, const bool& SkipEmpty = true) const; TEST(TStr, SplitOnAllCh) { TStr Str = "abcabca"; TStrV StrV; Str.SplitOnAllCh('a', StrV, true); EXPECT_EQ(2, StrV.Len()); EXPECT_EQ(TStr("bc"), StrV[0]); EXPECT_EQ(TStr("bc"), StrV[1]); Str.SplitOnAllCh('a', StrV, false); EXPECT_EQ(4, StrV.Len()); EXPECT_EQ(TStr(), StrV[0]); EXPECT_EQ(TStr("bc"), StrV[1]); EXPECT_EQ(TStr("bc"), StrV[2]); EXPECT_EQ(TStr(), StrV[3]); // edge cases Str = "a"; Str.SplitOnAllCh('a', StrV, true); EXPECT_EQ(0, StrV.Len()); Str.SplitOnAllCh('a', StrV, false); EXPECT_EQ(2, StrV.Len()); Str = "aa"; Str.SplitOnAllCh('a', StrV, true); EXPECT_EQ(0, StrV.Len()); Str.SplitOnAllCh('a', StrV, false); EXPECT_EQ(3, StrV.Len()); Str = ""; Str.SplitOnAllCh('a', StrV, true); EXPECT_EQ(0, StrV.Len()); Str.SplitOnAllCh('a', StrV, false); EXPECT_EQ(1, StrV.Len()); // non match Str = "abc"; Str.SplitOnAllCh('x', StrV, true); EXPECT_EQ(1, StrV.Len()); Str.SplitOnAllCh('x', StrV, false); EXPECT_EQ(1, StrV.Len()); }
TStrV TEnv::GetIfArgPrefixStrV(const TStr& PrefixStr, TStrV& DfValV, const TStr& DNm) const { TStrV ArgValV; if (Env.GetArgs() <= MnArgs) { // 'usage' argument message if (!SilentP) { printf(" %s%s (default:", PrefixStr.CStr(), DNm.CStr()); for (int DfValN = 0; DfValN < DfValV.Len(); DfValN++) { if (DfValN > 0) printf(", "); printf("%s", DfValV[DfValN].CStr()); } printf(")\n"); } return ArgValV; } else { // argument & value message TStrV Items; for (int ArgN = 0; ArgN < GetArgs(); ArgN++) { // get argument string TStr ArgStr = GetArg(ArgN); if (ArgStr.GetSubStr(0, PrefixStr.Len() - 1) == PrefixStr) { TStr ArgVals = ArgStr.GetSubStr( PrefixStr.Len(), ArgStr.Len()); ArgVals.SplitOnAllCh(',', Items); for (int i = 0; i < Items.Len(); i++) ArgValV.Add(Items[i]); } } if (ArgValV.Empty()) ArgValV = DfValV; // output argument values TChA MsgChA; MsgChA += DNm; MsgChA += " ("; MsgChA += PrefixStr; MsgChA += ")="; for (int ArgValN = 0; ArgValN < ArgValV.Len(); ArgValN++) { if (ArgValN > 0) MsgChA += ", "; MsgChA += ArgValV[ArgValN]; } if (!SilentP) TNotify::OnStatus(Notify, MsgChA); return ArgValV; } }
void TStrFeatureSpace::FromAddStr(const TStr& Serialized, TIntFltKdV& Vec, char Sep) { TStrV Toks; Serialized.SplitOnAllCh(Sep, Toks, true); Vec.Gen(Toks.Len()); for (int i = 0; i < Toks.Len(); i++) { TStr Key, Value; Toks[i].SplitOnCh(Key, ':', Value); int FeatureId = GetAddId(Key); double FeatureWgt; if (Value.IsFlt(FeatureWgt)) { Vec[i].Key = FeatureId; Vec[i].Dat = FeatureWgt; } else { EFailR((Value + TStr(" is not a valid floating point number.")).CStr()); } } Vec.Sort(); }
void TGeoIpBs::GetLocation(const TStr& IpNumStr, TStr& CountrySNm, TStr& CityNm, double& Latitude, double& Longitude){ // prepare unknown location info CountrySNm="--"; CityNm="Unknown"; Latitude=0; Longitude=0; // split ip-num to sub-number-strings TStrV IpSubNumStrV; IpNumStr.SplitOnAllCh('.', IpSubNumStrV, false); // convert sub-number-strings to sub-numbers and ip-number int IpSubNum0, IpSubNum1, IpSubNum2, IpSubNum3; uint IpNum; if ( IpSubNumStrV[0].IsInt(true, 0, 255, IpSubNum0)&& IpSubNumStrV[1].IsInt(true, 0, 255, IpSubNum1)&& IpSubNumStrV[2].IsInt(true, 0, 255, IpSubNum2)&& IpSubNumStrV[3].IsInt(true, 0, 255, IpSubNum3)){ IpNum=16777216*IpSubNum0+65536*IpSubNum1+256*IpSubNum2+IpSubNum3; } else { return; } // get location-id from ip-number int LocId=-1; int IpNumN; LocMnIpNumV.SearchBin(IpNum+1, IpNumN); if (IpNumN>0){ uint MnIpNum=LocMnIpNumV[IpNumN-1]; uint MxIpNum=MnIpNumToMxIpNumLocIdPrH.GetDat(MnIpNum).Val1; if ((MnIpNum<=IpNum)&&(IpNum<=MxIpNum)){ LocId=MnIpNumToMxIpNumLocIdPrH.GetDat(MnIpNum).Val2; } } // get location info if (LocId!=-1){ int CountrySNmId=LocIdToCountryIdCityIdLatitudeLongitudeQuH.GetDat(LocId).Val1; CountrySNm=GeoNmH.GetKey(CountrySNmId); int CityNmId=LocIdToCountryIdCityIdLatitudeLongitudeQuH.GetDat(LocId).Val2; CityNm=GeoNmH.GetKey(CityNmId); Latitude=LocIdToCountryIdCityIdLatitudeLongitudeQuH.GetDat(LocId).Val3; Longitude=LocIdToCountryIdCityIdLatitudeLongitudeQuH.GetDat(LocId).Val4; } }
void TStrFeatureSpace::FromStr(const TStr& Serialized, TIntFltKdV& Vec, char Sep) const { TStrV Toks; Serialized.SplitOnAllCh(Sep, Toks, true); Vec.Gen(Toks.Len(),0); for (int i = 0; i < Toks.Len(); i++) { TStr Key, Value; Toks[i].SplitOnCh(Key, ':', Value); TStrFSSize FeatureId; if (GetIfExistsId(Key, FeatureId)) { double FeatureWgt; if (Value.IsFlt(FeatureWgt)) { TIntFltKd& Kv = Vec[Vec.Add()]; Kv.Key = FeatureId; Kv.Dat = FeatureWgt; } else { EFailR((Value + TStr(" is not a valid floating point number.")).CStr()); } } } Vec.Sort(); }
TFltRect TVizMapContext::PaintPointNm(PGks Gks, PVizMapPoint Point, const int& X, const int& Y, const int& PointFontSize, const int& PointNmFontScale, const bool& SelPointP, const bool& IsCatP) { // get and clean point name TStr PointNm = Point->GetPointNm(); PointNm.ChangeChAll('_', ' '); if (PointNm.IsStrIn("[[")) { const int StartPos = PointNm.SearchStr("[["); PointNm = PointNm.Left(StartPos - 1); } // set font TGksColor FontColor = SelPointP ? ColorSelPointFont : ColorPointFont; const int FontSize = PointFontSize + TFlt::Round(Point->GetWgt()*PointNmFontScale); //TFSet FontStyle = IsCatP ? (TFSet() | gfsBold) : TFSet(); //Gks->SetFont(TGksFont::New("ARIAL", FontSize, FontColor, FontStyle)); Gks->SetFont(TGksFont::New("ARIAL", FontSize, FontColor)); // refit it for the screen TStr ScreenPointNm = Gks->BreakTxt(PointNm, " ", "", PointNmWidth, PointNmMxLines); // calculate string position on the screen const int HalfTxtWidth = Gks->GetTxtWidth(ScreenPointNm) / 2; const int HalfTxtHeight = Gks->GetTxtHeight(ScreenPointNm) / 2; // draw it! const int MnX = X - HalfTxtWidth; int CurrY = Y - HalfTxtHeight; TStrV LineV; ScreenPointNm.SplitOnAllCh('\n', LineV); for (int LineN = 0; LineN < LineV.Len(); LineN++) { const int HalfLineWidth = Gks->GetTxtWidth(LineV[LineN]) / 2; const int LineHeight = Gks->GetTxtHeight(LineV[LineN]); Gks->PutTxt(LineV[LineN], MnX + (HalfTxtWidth - HalfLineWidth), CurrY); CurrY += LineHeight-3; } // finish return TFltRect(X - HalfTxtWidth, Y - HalfTxtHeight, X + HalfTxtWidth, Y + HalfTxtHeight - LineV.Len()*3); }
void TNetInfBs::GenerateGroundTruth(const int& TNetwork, const int& NNodes, const int& NEdges, const TStr& NetworkParams) { TKronMtx SeedMtx; TStr MtxNm; switch (TNetwork) { // 2-dimension kronecker network case 0: printf("Kronecker graph for Ground Truth\n"); SeedMtx = TKronMtx::GetMtx(NetworkParams.CStr()); // 0.5,0.5,0.5,0.5 printf("\n*** Seed matrix:\n"); SeedMtx.Dump(); GroundTruth = TKronMtx::GenFastKronecker(SeedMtx, (int)TMath::Log2(NNodes), NEdges, true, 0); break; // forest fire network case 1: printf("Forest Fire graph for Ground Truth\n"); TStrV NetworkParamsV; NetworkParams.SplitOnAllCh(';', NetworkParamsV); TFfGGen FF(true, // BurnExpFireP NetworkParamsV[0].GetInt(), // StartNNodes (1) NetworkParamsV[1].GetFlt(), // ForwBurnProb (0.2) NetworkParamsV[2].GetFlt(), // BackBurnProb (0.17) NetworkParamsV[3].GetInt(), // DecayProb (1) NetworkParamsV[4].GetInt(), // Take2AmbasPrb (0) NetworkParamsV[5].GetInt()); // OrphanPrb (0) FF.GenGraph(NNodes, false); GroundTruth = FF.GetGraph(); break; } }
inline TStrV createTStrV(TStr s) { TStrV vec; s.SplitOnAllCh(' ', vec); return vec; }
void TSwSet::MultiAdd(const TStr& Str){ TStrV StrV; Str.SplitOnAllCh('.', StrV); for (int StrN=0; StrN<StrV.Len(); StrN++){Add(StrV[StrN]);} }
void TFtrGenMultiNom::Update(const TStr& Str) { TStrV EltV; Str.SplitOnAllCh(';', EltV); for (int EltN = 0; EltN < EltV.Len(); EltN++) { FtrGen->Update(EltV[EltN]); } }