TEST(TStr, ToLc) { TStr Mixedcase = "AbCd"; TStr Lowercase = "abcd"; TStr Empty = ""; TStr Empty2; Empty.ToLc(); Mixedcase.ToLc(); EXPECT_EQ(Mixedcase, Lowercase); EXPECT_EQ(Empty, Empty2); }
void TStrUtil::GetStdNameV(TStr AuthorNames, TStrV& StdNameV) { AuthorNames.ChangeChAll('\n', ' '); AuthorNames.ToLc(); // split into author names TStrV AuthV, TmpV, Tmp2V; // split on 'and' AuthorNames.SplitOnStr(" and ", TmpV); int i; for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(',', Tmp2V); AuthV.AddV(Tmp2V); } // split on '&' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh('&', Tmp2V); AuthV.AddV(Tmp2V); } // split on ',' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(',', Tmp2V); AuthV.AddV(Tmp2V); } // split on ';' TmpV = AuthV; AuthV.Clr(); for (i = 0; i < TmpV.Len(); i++) { TmpV[i].SplitOnAllCh(';', Tmp2V); AuthV.AddV(Tmp2V); } // standardize names StdNameV.Clr(); //printf("\n*** %s\n", AuthorNames.CStr()); for (i = 0; i < AuthV.Len(); i++) { TStr StdName = GetStdName(AuthV[i]); if (! StdName.Empty()) { //printf("\t%s ==> %s\n", AuthV[i].CStr(), StdName.CStr()); StdNameV.Add(StdName); } } }
// <last_name>_<first name innitial> TStr TStrUtil::GetStdName(TStr AuthorName) { TStr StdName; AuthorName.ToLc(); AuthorName.ChangeChAll('\n', ' '); AuthorName.ChangeChAll('.', ' '); // if there is a number in the name, remove it and everything after it int i, pos = 0; while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) { pos++; } if (pos < AuthorName.Len()) { AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc(); } if (AuthorName.Empty()) { return TStr::GetNullStr(); } // replace everything after '(' int b = AuthorName.SearchCh('('); if (b != -1) { AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc(); } // skip if contains ')' if (AuthorName .SearchCh(')')!=-1) { return TStr::GetNullStr(); } // skip if it is not a name if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1 || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) { return TStr::GetNullStr(); } // remove all non-letters (latex tags, ...) TChA NewName; for (i = 0; i < AuthorName.Len(); i++) { const char Ch = AuthorName[i]; if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') { NewName += Ch; } } StdName = NewName; StdName.ToTrunc(); TStrV AuthNmV; StdName.SplitOnWs(AuthNmV); // too short -- not a name if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast(); if (AuthNmV.Len() < 2) return TStr::GetNullStr(); const TStr LastNm = AuthNmV.Last(); if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr(); IAssert(isalpha(AuthNmV[0][0])); return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]); }