Пример #1
0
void TGgSchRef::GetAuthNmVPubStr(
 const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){
  // split input string into two parts
  TStr AuthNmVStr; TStr PubStr;
  AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr);
  // author-names string
  AuthNmVStr.SplitOnAllCh(',', AuthNmV, true);
  for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){
    AuthNmV[AuthN].ToTrunc();
  }
  if ((!AuthNmV.Empty())&&
   ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){
    AuthNmV.DelLast();
  }
  // publication-name & publication-year string
  TStr OriginStr; TStr LinkStr;
  PubStr.SplitOnStr(OriginStr, " - ", LinkStr);
  OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr);
  PubNm.ToTrunc(); PubYearStr.ToTrunc();
  if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubYearStr.GetSubStr(0, 3);
  } else
  if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubNm.GetSubStr(0, 3); PubNm="";
  } else {
    PubYearStr="";
  }
}
Пример #2
0
// <last_name>_<first name innitial>
TStr TStrUtil::GetStdName(TStr AuthorName) {
    TStr StdName;
    AuthorName.ToLc();
    AuthorName.ChangeChAll('\n', ' ');
    AuthorName.ChangeChAll('.', ' ');
    // if there is a number in the name, remove it and everything after it
    int i, pos = 0;
    while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
        pos++;
    }
    if (pos < AuthorName.Len()) {
        AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc();
    }
    if (AuthorName.Empty()) {
        return TStr::GetNullStr();
    }

    // replace everything after '('
    int b = AuthorName.SearchCh('(');
    if (b != -1) {
        AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc();
    }
    // skip if contains ')'
    if (AuthorName .SearchCh(')')!=-1) {
        return TStr::GetNullStr();
    }
    // skip if it is not a name
    if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
            || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
        return TStr::GetNullStr();
    }
    // remove all non-letters (latex tags, ...)
    TChA NewName;
    for (i = 0; i < AuthorName.Len(); i++) {
        const char Ch = AuthorName[i];
        if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') {
            NewName += Ch;
        }
    }
    StdName = NewName;
    StdName.ToTrunc();
    TStrV AuthNmV;
    StdName.SplitOnWs(AuthNmV);
    // too short -- not a name
    if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
    if (AuthNmV.Len() < 2) return TStr::GetNullStr();

    const TStr LastNm = AuthNmV.Last();
    if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();

    IAssert(isalpha(AuthNmV[0][0]));
    return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
}
Пример #3
0
TEST(TStr, Trunc) {
	TStr Str = "   abcdef    ";
	TStr Str2 = "    ";
	TStr Str3 = "abcdef    ";
	TStr Str4 = "    abcdef";
	EXPECT_EQ(Str.GetTrunc(), "abcdef");
	EXPECT_EQ(Str2.GetTrunc(), TStr());	
	EXPECT_EQ(Str.ToTrunc(), "abcdef");
	EXPECT_EQ(Str2.ToTrunc(), TStr());
	EXPECT_EQ(Str3.ToTrunc(), "abcdef");
	EXPECT_EQ(Str4.ToTrunc(), "abcdef");
}
Пример #4
0
PLwOnto TLwOnto::LoadAsfaVoc(const TStr& FPath){
  // normalize path
  TStr NrFPath=TStr::GetNrFPath(FPath);
  // create ontology
  PLwOnto LwOnto=TLwOnto::New();

  // create language object
  int EnLangId=LwOnto->GetLangBs()->AddLang("EN", "English");

  // create term-types
  {PLwTermType D_TermType=TLwTermType::New(0, "Descriptor", EnLangId);
  PLwTermType ND_TermType=TLwTermType::New(1, "Non-descriptor", EnLangId);
  LwOnto->GetTermTypeBs()->AddTermType(D_TermType);
  LwOnto->GetTermTypeBs()->AddTermType(ND_TermType);}

  // create link-types
  {PLwLinkType BT_LinkType=TLwLinkType::New(0, "BT", EnLangId, "Broader-Term");
  PLwLinkType NT_LinkType=TLwLinkType::New(1, "NT", EnLangId, "Narrower-Term");
  PLwLinkType RT_LinkType=TLwLinkType::New(2, "RT", EnLangId, "Related-Term");
  PLwLinkType UF_LinkType=TLwLinkType::New(3, "UF", EnLangId, "Used-For");
  PLwLinkType USE_LinkType=TLwLinkType::New(4, "USE", EnLangId, "Used-By");
  LwOnto->GetLinkTypeBs()->AddLinkType(BT_LinkType);
  LwOnto->GetLinkTypeBs()->AddLinkType(NT_LinkType);
  LwOnto->GetLinkTypeBs()->AddLinkType(RT_LinkType);
  LwOnto->GetLinkTypeBs()->AddLinkType(UF_LinkType);
  LwOnto->GetLinkTypeBs()->AddLinkType(USE_LinkType);}

  // load ontology file
  TStr AsfaOntoFNm=NrFPath+"asfa_xml_20060522.xml";
  printf("Loading '%s' ...", AsfaOntoFNm.CStr());
  PXmlDoc AsfaXmlDoc=TXmlDoc::LoadTxt(AsfaOntoFNm);
  IAssert(AsfaXmlDoc->IsOk());
  TXmlTokV ConceptXmlTokV;
  AsfaXmlDoc->GetTagTokV("THESAURUS|CONCEPT", ConceptXmlTokV);
  printf(" Done.\n");

  // create terms
  {printf("Creating terms ...");
  for (int ConceptN=0; ConceptN<ConceptXmlTokV.Len(); ConceptN++){
    PXmlTok ConceptXmlTok=ConceptXmlTokV[ConceptN];
    // term-name
    TStr TermNm;
    if (ConceptXmlTok->IsSubTag("NON-DESCRIPTOR")){
      TermNm=ConceptXmlTok->GetTagTokStr("NON-DESCRIPTOR");}
    else if (ConceptXmlTok->IsSubTag("DESCRIPTOR")){
      TermNm=ConceptXmlTok->GetTagTokStr("DESCRIPTOR");}
    // term-type
    TStr TermTypeNm=ConceptXmlTok->GetTagTokStr("TYP");
    int TermTypeId=LwOnto->GetTermTypeBs()->GetTermTypeId(TermTypeNm, EnLangId);
    // description
    TStr DescStr;
    if (ConceptXmlTok->IsSubTag("SN")){
      DescStr=ConceptXmlTok->GetTagTokStr("SN");
      DescStr.ChangeChAll('\r', ' '); DescStr.ChangeChAll('\n', ' ');
      DescStr.ChangeStrAll("  ", " "); DescStr.ToTrunc();
    }
    // create term
    PLwTerm Term=TLwTerm::New(-1, TermNm, EnLangId, TermTypeId, DescStr);
    LwOnto->GetTermBs()->AddTermGetTermId(Term);
  }
  printf(" Done. (%d)\n", LwOnto->GetTermBs()->GetTerms());}

  // create links
  {printf("Creating links ...");
  for (int ConceptN=0; ConceptN<ConceptXmlTokV.Len(); ConceptN++){
    PXmlTok ConceptXmlTok=ConceptXmlTokV[ConceptN];
    // source-term-name
    TStr TermNm1;
    if (ConceptXmlTok->IsSubTag("NON-DESCRIPTOR")){
      TermNm1=ConceptXmlTok->GetTagTokStr("NON-DESCRIPTOR");}
    else if (ConceptXmlTok->IsSubTag("DESCRIPTOR")){
      TermNm1=ConceptXmlTok->GetTagTokStr("DESCRIPTOR");}
    int TermId1=LwOnto->GetTermBs()->GetTermId(TermNm1, EnLangId);
    // links
    for (int SubTokN=0; SubTokN<ConceptXmlTok->GetSubToks(); SubTokN++){
      PXmlTok SubTok=ConceptXmlTok->GetSubTok(SubTokN);
      if (SubTok->IsTag()){
        TStr LinkTypeNm=SubTok->GetTagNm();
        if (LwOnto->GetLinkTypeBs()->IsLinkType(LinkTypeNm, EnLangId)){
          // destination-term-name
          TStr TermNm2=ConceptXmlTok->GetTagTokStr(LinkTypeNm);
          int TermId2=LwOnto->GetTermBs()->GetTermId(TermNm2, EnLangId);
          int LinkTypeId=LwOnto->GetLinkTypeBs()->GetLinkTypeId(LinkTypeNm, EnLangId);
          LwOnto->GetLinkBs()->AddLink(TermId1, LinkTypeId, TermId2);
        }
      }
    }
  }
  printf(" Done. (%d)\n", LwOnto->GetLinkBs()->GetLinks());}

  // return ontology
  return LwOnto;
}