void TGgSchRef::GetAuthNmVPubStr( const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){ // split input string into two parts TStr AuthNmVStr; TStr PubStr; AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr); // author-names string AuthNmVStr.SplitOnAllCh(',', AuthNmV, true); for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){ AuthNmV[AuthN].ToTrunc(); } if ((!AuthNmV.Empty())&& ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){ AuthNmV.DelLast(); } // publication-name & publication-year string TStr OriginStr; TStr LinkStr; PubStr.SplitOnStr(OriginStr, " - ", LinkStr); OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr); PubNm.ToTrunc(); PubYearStr.ToTrunc(); if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){ PubYearStr=PubYearStr.GetSubStr(0, 3); } else if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){ PubYearStr=PubNm.GetSubStr(0, 3); PubNm=""; } else { PubYearStr=""; } }
// <last_name>_<first name innitial> TStr TStrUtil::GetStdName(TStr AuthorName) { TStr StdName; AuthorName.ToLc(); AuthorName.ChangeChAll('\n', ' '); AuthorName.ChangeChAll('.', ' '); // if there is a number in the name, remove it and everything after it int i, pos = 0; while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) { pos++; } if (pos < AuthorName.Len()) { AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc(); } if (AuthorName.Empty()) { return TStr::GetNullStr(); } // replace everything after '(' int b = AuthorName.SearchCh('('); if (b != -1) { AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc(); } // skip if contains ')' if (AuthorName .SearchCh(')')!=-1) { return TStr::GetNullStr(); } // skip if it is not a name if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1 || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) { return TStr::GetNullStr(); } // remove all non-letters (latex tags, ...) TChA NewName; for (i = 0; i < AuthorName.Len(); i++) { const char Ch = AuthorName[i]; if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') { NewName += Ch; } } StdName = NewName; StdName.ToTrunc(); TStrV AuthNmV; StdName.SplitOnWs(AuthNmV); // too short -- not a name if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast(); if (AuthNmV.Len() < 2) return TStr::GetNullStr(); const TStr LastNm = AuthNmV.Last(); if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr(); IAssert(isalpha(AuthNmV[0][0])); return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]); }
TEST(TStr, Trunc) { TStr Str = " abcdef "; TStr Str2 = " "; TStr Str3 = "abcdef "; TStr Str4 = " abcdef"; EXPECT_EQ(Str.GetTrunc(), "abcdef"); EXPECT_EQ(Str2.GetTrunc(), TStr()); EXPECT_EQ(Str.ToTrunc(), "abcdef"); EXPECT_EQ(Str2.ToTrunc(), TStr()); EXPECT_EQ(Str3.ToTrunc(), "abcdef"); EXPECT_EQ(Str4.ToTrunc(), "abcdef"); }
PLwOnto TLwOnto::LoadAsfaVoc(const TStr& FPath){ // normalize path TStr NrFPath=TStr::GetNrFPath(FPath); // create ontology PLwOnto LwOnto=TLwOnto::New(); // create language object int EnLangId=LwOnto->GetLangBs()->AddLang("EN", "English"); // create term-types {PLwTermType D_TermType=TLwTermType::New(0, "Descriptor", EnLangId); PLwTermType ND_TermType=TLwTermType::New(1, "Non-descriptor", EnLangId); LwOnto->GetTermTypeBs()->AddTermType(D_TermType); LwOnto->GetTermTypeBs()->AddTermType(ND_TermType);} // create link-types {PLwLinkType BT_LinkType=TLwLinkType::New(0, "BT", EnLangId, "Broader-Term"); PLwLinkType NT_LinkType=TLwLinkType::New(1, "NT", EnLangId, "Narrower-Term"); PLwLinkType RT_LinkType=TLwLinkType::New(2, "RT", EnLangId, "Related-Term"); PLwLinkType UF_LinkType=TLwLinkType::New(3, "UF", EnLangId, "Used-For"); PLwLinkType USE_LinkType=TLwLinkType::New(4, "USE", EnLangId, "Used-By"); LwOnto->GetLinkTypeBs()->AddLinkType(BT_LinkType); LwOnto->GetLinkTypeBs()->AddLinkType(NT_LinkType); LwOnto->GetLinkTypeBs()->AddLinkType(RT_LinkType); LwOnto->GetLinkTypeBs()->AddLinkType(UF_LinkType); LwOnto->GetLinkTypeBs()->AddLinkType(USE_LinkType);} // load ontology file TStr AsfaOntoFNm=NrFPath+"asfa_xml_20060522.xml"; printf("Loading '%s' ...", AsfaOntoFNm.CStr()); PXmlDoc AsfaXmlDoc=TXmlDoc::LoadTxt(AsfaOntoFNm); IAssert(AsfaXmlDoc->IsOk()); TXmlTokV ConceptXmlTokV; AsfaXmlDoc->GetTagTokV("THESAURUS|CONCEPT", ConceptXmlTokV); printf(" Done.\n"); // create terms {printf("Creating terms ..."); for (int ConceptN=0; ConceptN<ConceptXmlTokV.Len(); ConceptN++){ PXmlTok ConceptXmlTok=ConceptXmlTokV[ConceptN]; // term-name TStr TermNm; if (ConceptXmlTok->IsSubTag("NON-DESCRIPTOR")){ TermNm=ConceptXmlTok->GetTagTokStr("NON-DESCRIPTOR");} else if (ConceptXmlTok->IsSubTag("DESCRIPTOR")){ TermNm=ConceptXmlTok->GetTagTokStr("DESCRIPTOR");} // term-type TStr TermTypeNm=ConceptXmlTok->GetTagTokStr("TYP"); int TermTypeId=LwOnto->GetTermTypeBs()->GetTermTypeId(TermTypeNm, EnLangId); // description TStr DescStr; if (ConceptXmlTok->IsSubTag("SN")){ DescStr=ConceptXmlTok->GetTagTokStr("SN"); DescStr.ChangeChAll('\r', ' '); DescStr.ChangeChAll('\n', ' '); DescStr.ChangeStrAll(" ", " "); DescStr.ToTrunc(); } // create term PLwTerm Term=TLwTerm::New(-1, TermNm, EnLangId, TermTypeId, DescStr); LwOnto->GetTermBs()->AddTermGetTermId(Term); } printf(" Done. (%d)\n", LwOnto->GetTermBs()->GetTerms());} // create links {printf("Creating links ..."); for (int ConceptN=0; ConceptN<ConceptXmlTokV.Len(); ConceptN++){ PXmlTok ConceptXmlTok=ConceptXmlTokV[ConceptN]; // source-term-name TStr TermNm1; if (ConceptXmlTok->IsSubTag("NON-DESCRIPTOR")){ TermNm1=ConceptXmlTok->GetTagTokStr("NON-DESCRIPTOR");} else if (ConceptXmlTok->IsSubTag("DESCRIPTOR")){ TermNm1=ConceptXmlTok->GetTagTokStr("DESCRIPTOR");} int TermId1=LwOnto->GetTermBs()->GetTermId(TermNm1, EnLangId); // links for (int SubTokN=0; SubTokN<ConceptXmlTok->GetSubToks(); SubTokN++){ PXmlTok SubTok=ConceptXmlTok->GetSubTok(SubTokN); if (SubTok->IsTag()){ TStr LinkTypeNm=SubTok->GetTagNm(); if (LwOnto->GetLinkTypeBs()->IsLinkType(LinkTypeNm, EnLangId)){ // destination-term-name TStr TermNm2=ConceptXmlTok->GetTagTokStr(LinkTypeNm); int TermId2=LwOnto->GetTermBs()->GetTermId(TermNm2, EnLangId); int LinkTypeId=LwOnto->GetLinkTypeBs()->GetLinkTypeId(LinkTypeNm, EnLangId); LwOnto->GetLinkBs()->AddLink(TermId1, LinkTypeId, TermId2); } } } } printf(" Done. (%d)\n", LwOnto->GetLinkBs()->GetLinks());} // return ontology return LwOnto; }