void TGgSchRef::GetAuthNmVPubStr( const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){ // split input string into two parts TStr AuthNmVStr; TStr PubStr; AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr); // author-names string AuthNmVStr.SplitOnAllCh(',', AuthNmV, true); for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){ AuthNmV[AuthN].ToTrunc(); } if ((!AuthNmV.Empty())&& ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){ AuthNmV.DelLast(); } // publication-name & publication-year string TStr OriginStr; TStr LinkStr; PubStr.SplitOnStr(OriginStr, " - ", LinkStr); OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr); PubNm.ToTrunc(); PubYearStr.ToTrunc(); if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){ PubYearStr=PubYearStr.GetSubStr(0, 3); } else if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){ PubYearStr=PubNm.GetSubStr(0, 3); PubNm=""; } else { PubYearStr=""; } }
void TStrUtil::GetStdNameV(TStr AuthorNames, TStrV& StdNameV) { AuthorNames.ChangeChAll('\n', ' '); AuthorNames.ToLc(); // split into author names TStrV AuthV, TmpV, Tmp2V; // split on 'and' AuthorNames.SplitOnStr(" and ", TmpV); int i; for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(',', Tmp2V); AuthV.AddV(Tmp2V); } // split on '&' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh('&', Tmp2V); AuthV.AddV(Tmp2V); } // split on ',' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(',', Tmp2V); AuthV.AddV(Tmp2V); } // split on ';' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(';', Tmp2V); AuthV.AddV(Tmp2V); } // standardize names StdNameV.Clr(); //printf("\n*** %s\n", AuthorNames.CStr()); for (i = 0; i < AuthV.Len(); i++) { TStr StdName = GetStdName(AuthV[i]); if (! StdName.Empty()) { //printf("\t%s ==> %s\n", AuthV[i].CStr(), StdName.CStr()); StdNameV.Add(StdName); } } }
void TCordisEuPart::ExtrEuPartV(const TStr& AllEuPartStr, TCordisEuPartV& EuPartV){ TStrV EuPartStrV; AllEuPartStr.SplitOnStr("Organisation Type:", EuPartStrV); for (int EuPartN=1; EuPartN<EuPartStrV.Len(); EuPartN++){ TStr EuPartStr=EuPartStrV[EuPartN]; TStr WcStr= "</span>*</td>" "*Organisation:</span>*<br>" "*</td>*"; TStrV StarStrV; if (EuPartStr.IsWcMatch(WcStr, StarStrV)){ PCordisEuPart EuPart=TCordisEuPart::New(); EuPart->DeptNm=ExtrDeptNm(EuPartStr); EuPart->OrgNm=TXmlLx::GetPlainStrFromXmlStr(StarStrV[2].GetTrunc()); EuPart->OrgTypeNm=StarStrV[0].GetTrunc(); EuPart->CountryNm=ExtrCountry(StarStrV[3].GetTrunc()); EuPart->CoordP=(EuPartN==1); printf(" Partner: '%s'/'%s'/'%s'/'%s'/'%s'\n", EuPart->DeptNm.CStr(), EuPart->OrgNm.CStr(), EuPart->OrgTypeNm.CStr(), EuPart->CountryNm.CStr(), TBool::GetStr(EuPart->CoordP).CStr()); EuPartV.Add(EuPart); } else { printf("Partner Fields Not Found!\n"); } } }
DCluster::DCluster(TStr LineInput) { // Parse start/end date TStrV Params; LineInput.SplitOnStr("\t", Params); Id = TInt(Params[0].GetInt()); Start = TSecTm::GetDtTmFromYmdHmsStr(Params[1]); End = TSecTm::GetDtTmFromYmdHmsStr(Params[2]); uint StartDay = Start.GetInUnits(tmuDay); uint EndDay = End.GetInUnits(tmuDay); DiffDay = EndDay - StartDay + 1; // Stats/size Unique = TInt(Params[3].GetInt()); Size = TInt(Params[4].GetInt()); NumPeaks = TInt(Params[5].GetInt()); RepStrLen = TInt(Params[6].GetInt()); // Strings RepStr = Params[7]; RepURL = Params[8]; // Peak times First = TSecTm::GetDtTmFromYmdHmsStr(Params[9]); Last = TSecTm::GetDtTmFromYmdHmsStr(Params[10]); Peak = TSecTm::GetDtTmFromYmdHmsStr(Params[11]); bool ArchiveBool = true; Params[12].IsBool(ArchiveBool); Archived = TBool(ArchiveBool); DiscardState = TInt(Params[13].GetInt()); }
///// Splits on the first occurrence of the target string ///// if the target string is not found the whole string is returned as the left side //void SplitOnStr(TStr& LStr, const TStr& SplitStr, TStr& RStr) const; TEST(TStr, SplitOnStr) { const TStr Str = "abcd"; const TStr EmptyStr = ""; TStr LStr, RStr; // left empty Str.SplitOnStr(LStr, "ab", RStr); EXPECT_EQ(LStr, ""); EXPECT_EQ(RStr, "cd"); // both nonempty Str.SplitOnStr(LStr, "bc", RStr); EXPECT_EQ(LStr, "a"); EXPECT_EQ(RStr, "d"); // right empty Str.SplitOnStr(LStr, "cd", RStr); EXPECT_EQ(LStr, "ab"); EXPECT_EQ(RStr, ""); // both empty Str.SplitOnStr(LStr, "abcd", RStr); EXPECT_EQ(LStr, ""); EXPECT_EQ(RStr, ""); // no-match Str.SplitOnStr(LStr, "fg", RStr); EXPECT_EQ(LStr, Str); EXPECT_EQ(RStr, ""); Str.SplitOnStr(LStr, "abcde", RStr); EXPECT_EQ(LStr, Str); EXPECT_EQ(RStr, ""); Str.SplitOnStr(LStr, "", RStr); EXPECT_EQ(LStr, Str); EXPECT_EQ(RStr, ""); // empty EmptyStr.SplitOnStr(LStr, "aa", RStr); EXPECT_EQ(LStr, ""); EXPECT_EQ(RStr, ""); EmptyStr.SplitOnStr(LStr, "", RStr); EXPECT_EQ(LStr, ""); EXPECT_EQ(RStr, ""); }
///////////////////////////////////////////////// // EuPartner TStr TCordisEuPart::ExtrCountry(const TStr& AddrStr){ TStr CountryStr; TStrV LnV; AddrStr.SplitOnStr("<br>", LnV); if (LnV.Len()>0){ CountryStr=LnV.Last(); if (CountryStr.Empty()&&(LnV.Len()>1)){ CountryStr=LnV[LnV.Len()-2];} CountryStr.DelChAll('\r'); CountryStr.DelChAll('\n'); } if (CountryStr.Empty()){ printf("Country Field Not Found!\n");} return CountryStr; }
inline TStr getWebsite(TStr fulladdress) { TStr left,right,tmp,res; if(fulladdress.SearchStr(TStr("http"),0)>=0) { fulladdress.SplitOnStr(left,TStr("//"),right); right.SplitOnCh(res,'/',tmp); } else { fulladdress.SplitOnCh(res,'/',tmp); } return res; }
DQuote::DQuote(TStr LineInput) { TStrV Params; LineInput.SplitOnStr("\t", Params); // Extract size, numpeaks, numwords Id = TInt(Params[1].GetInt()); Size = TInt(Params[2].GetInt()); NumPeaks = TInt(Params[3].GetInt()); StrLen = TInt(Params[4].GetInt()); // Strings Str = Params[5]; RepURL = Params[6]; // dates First = TSecTm::GetDtTmFromYmdHmsStr(Params[7]); Last = TSecTm::GetDtTmFromYmdHmsStr(Params[8]); Peak = TSecTm::GetDtTmFromYmdHmsStr(Params[9]); }
///// Split on all the occurrences of SplitStr //void SplitOnStr(const TStr& SplitStr, TStrV& StrV) const; TEST(TStr, SplitOnStr_VectorOutput) { TStr Str = "xybcxybcxy"; TStrV StrV; Str.SplitOnStr("xy", StrV); /* for (int i = 0; i < StrV.Len(); i++) { printf("%s\n", StrV[i]); }*/ //EXPECT_EQ(2, StrV.Len()); //EXPECT_EQ(TStr("bc"), StrV[0]); //EXPECT_EQ(TStr("bc"), StrV[1]); // //// edge cases //Str = "a"; //Str.SplitOnAllCh('a', StrV, true); //EXPECT_EQ(0, StrV.Len()); //Str.SplitOnAllCh('a', StrV, false); //EXPECT_EQ(2, StrV.Len()); //Str = "aa"; //Str.SplitOnAllCh('a', StrV, true); //EXPECT_EQ(0, StrV.Len()); //Str.SplitOnAllCh('a', StrV, false); //EXPECT_EQ(3, StrV.Len()); //Str = ""; //Str.SplitOnAllCh('a', StrV, true); //EXPECT_EQ(0, StrV.Len()); //Str.SplitOnAllCh('a', StrV, false); //EXPECT_EQ(1, StrV.Len()); //// non match //Str = "abc"; //Str.SplitOnAllCh('x', StrV, true); //EXPECT_EQ(1, StrV.Len()); //Str.SplitOnAllCh('x', StrV, false); //EXPECT_EQ(1, StrV.Len()); }