示例#1
0
文件: google.cpp 项目: Accio/snap
void TGgSchRef::GetAuthNmVPubStr(
 const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){
  // split input string into two parts
  TStr AuthNmVStr; TStr PubStr;
  AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr);
  // author-names string
  AuthNmVStr.SplitOnAllCh(',', AuthNmV, true);
  for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){
    AuthNmV[AuthN].ToTrunc();
  }
  if ((!AuthNmV.Empty())&&
   ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){
    AuthNmV.DelLast();
  }
  // publication-name & publication-year string
  TStr OriginStr; TStr LinkStr;
  PubStr.SplitOnStr(OriginStr, " - ", LinkStr);
  OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr);
  PubNm.ToTrunc(); PubYearStr.ToTrunc();
  if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubYearStr.GetSubStr(0, 3);
  } else
  if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubNm.GetSubStr(0, 3); PubNm="";
  } else {
    PubYearStr="";
  }
}
示例#2
0
void TStrUtil::GetStdNameV(TStr AuthorNames, TStrV& StdNameV) {
  AuthorNames.ChangeChAll('\n', ' ');
  AuthorNames.ToLc();
  // split into author names
  TStrV AuthV, TmpV, Tmp2V;
  // split on 'and'
  AuthorNames.SplitOnStr(" and ", TmpV);
  int i;
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(',', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on '&'
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh('&', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on ','
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(',', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on ';'
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(';', Tmp2V);  AuthV.AddV(Tmp2V); }
  // standardize names
  StdNameV.Clr();
  //printf("\n*** %s\n", AuthorNames.CStr());
  for (i = 0; i < AuthV.Len(); i++) {
    TStr StdName = GetStdName(AuthV[i]);
    if (! StdName.Empty()) {
      //printf("\t%s  ==>  %s\n", AuthV[i].CStr(), StdName.CStr());
      StdNameV.Add(StdName);
    }
  }
}
void TCordisEuPart::ExtrEuPartV(const TStr& AllEuPartStr, TCordisEuPartV& EuPartV){
  TStrV EuPartStrV; AllEuPartStr.SplitOnStr("Organisation Type:", EuPartStrV);
  for (int EuPartN=1; EuPartN<EuPartStrV.Len(); EuPartN++){
    TStr EuPartStr=EuPartStrV[EuPartN];
    TStr WcStr=
     "</span>*</td>"
     "*Organisation:</span>*<br>"
     "*</td>*";
    TStrV StarStrV;
    if (EuPartStr.IsWcMatch(WcStr, StarStrV)){
      PCordisEuPart EuPart=TCordisEuPart::New();
      EuPart->DeptNm=ExtrDeptNm(EuPartStr);
      EuPart->OrgNm=TXmlLx::GetPlainStrFromXmlStr(StarStrV[2].GetTrunc());
      EuPart->OrgTypeNm=StarStrV[0].GetTrunc();
      EuPart->CountryNm=ExtrCountry(StarStrV[3].GetTrunc());
      EuPart->CoordP=(EuPartN==1);
      printf("   Partner: '%s'/'%s'/'%s'/'%s'/'%s'\n",
       EuPart->DeptNm.CStr(), EuPart->OrgNm.CStr(),
       EuPart->OrgTypeNm.CStr(), EuPart->CountryNm.CStr(),
       TBool::GetStr(EuPart->CoordP).CStr());
      EuPartV.Add(EuPart);
    } else {
      printf("Partner Fields Not Found!\n");
    }
  }
}
示例#4
0
DCluster::DCluster(TStr LineInput) {
  // Parse start/end date
  TStrV Params;
  LineInput.SplitOnStr("\t", Params);
  Id = TInt(Params[0].GetInt());
  Start = TSecTm::GetDtTmFromYmdHmsStr(Params[1]);
  End = TSecTm::GetDtTmFromYmdHmsStr(Params[2]);
  uint StartDay = Start.GetInUnits(tmuDay);
  uint EndDay = End.GetInUnits(tmuDay);
  DiffDay = EndDay - StartDay + 1;

  // Stats/size
  Unique = TInt(Params[3].GetInt());
  Size = TInt(Params[4].GetInt());
  NumPeaks = TInt(Params[5].GetInt());
  RepStrLen = TInt(Params[6].GetInt());

  // Strings
  RepStr = Params[7];
  RepURL = Params[8];

  // Peak times
  First = TSecTm::GetDtTmFromYmdHmsStr(Params[9]);
  Last = TSecTm::GetDtTmFromYmdHmsStr(Params[10]);
  Peak = TSecTm::GetDtTmFromYmdHmsStr(Params[11]);
  bool ArchiveBool = true;
  Params[12].IsBool(ArchiveBool);
  Archived = TBool(ArchiveBool);
  DiscardState = TInt(Params[13].GetInt());
}
示例#5
0
///// Splits on the first occurrence of the target string
///// if the target string is not found the whole string is returned as the left side
//void SplitOnStr(TStr& LStr, const TStr& SplitStr, TStr& RStr) const;
TEST(TStr, SplitOnStr) {
	const TStr Str = "abcd";
	const TStr EmptyStr = "";
	TStr LStr, RStr;
	
	// left empty
	Str.SplitOnStr(LStr, "ab", RStr);
	EXPECT_EQ(LStr, "");
	EXPECT_EQ(RStr, "cd");
	
	// both nonempty
	Str.SplitOnStr(LStr, "bc", RStr);
	EXPECT_EQ(LStr, "a");
	EXPECT_EQ(RStr, "d");
	
	// right empty
	Str.SplitOnStr(LStr, "cd", RStr);
	EXPECT_EQ(LStr, "ab");
	EXPECT_EQ(RStr, "");
	
	// both empty
	Str.SplitOnStr(LStr, "abcd", RStr);
	EXPECT_EQ(LStr, "");
	EXPECT_EQ(RStr, "");
	
	// no-match
	Str.SplitOnStr(LStr, "fg", RStr);
	EXPECT_EQ(LStr, Str);
	EXPECT_EQ(RStr, "");
	Str.SplitOnStr(LStr, "abcde", RStr);
	EXPECT_EQ(LStr, Str);
	EXPECT_EQ(RStr, "");
	Str.SplitOnStr(LStr, "", RStr);
	EXPECT_EQ(LStr, Str);
	EXPECT_EQ(RStr, "");
	
	// empty
	EmptyStr.SplitOnStr(LStr, "aa", RStr);
	EXPECT_EQ(LStr, "");
	EXPECT_EQ(RStr, "");
	EmptyStr.SplitOnStr(LStr, "", RStr);
	EXPECT_EQ(LStr, "");
	EXPECT_EQ(RStr, "");
}
/////////////////////////////////////////////////
// EuPartner
TStr TCordisEuPart::ExtrCountry(const TStr& AddrStr){
  TStr CountryStr;
  TStrV LnV; AddrStr.SplitOnStr("<br>", LnV);
  if (LnV.Len()>0){
    CountryStr=LnV.Last();
    if (CountryStr.Empty()&&(LnV.Len()>1)){
      CountryStr=LnV[LnV.Len()-2];}
    CountryStr.DelChAll('\r');
    CountryStr.DelChAll('\n');
  }
  if (CountryStr.Empty()){
    printf("Country Field Not Found!\n");}
  return CountryStr;
}
inline TStr getWebsite(TStr fulladdress)
{
	TStr left,right,tmp,res;
	if(fulladdress.SearchStr(TStr("http"),0)>=0)
	{
		fulladdress.SplitOnStr(left,TStr("//"),right);
		right.SplitOnCh(res,'/',tmp);
	}
	else
	{
		fulladdress.SplitOnCh(res,'/',tmp);
	}
	return res;
}
示例#8
0
DQuote::DQuote(TStr LineInput) {
  TStrV Params;
  LineInput.SplitOnStr("\t", Params);

  // Extract size, numpeaks, numwords
  Id = TInt(Params[1].GetInt());
  Size = TInt(Params[2].GetInt());
  NumPeaks = TInt(Params[3].GetInt());
  StrLen = TInt(Params[4].GetInt());

  // Strings
  Str = Params[5];
  RepURL = Params[6];

  // dates
  First = TSecTm::GetDtTmFromYmdHmsStr(Params[7]);
  Last = TSecTm::GetDtTmFromYmdHmsStr(Params[8]);
  Peak = TSecTm::GetDtTmFromYmdHmsStr(Params[9]);
}
示例#9
0
///// Split on all the occurrences of SplitStr
//void SplitOnStr(const TStr& SplitStr, TStrV& StrV) const;
TEST(TStr, SplitOnStr_VectorOutput) {
	TStr Str = "xybcxybcxy";
	TStrV StrV;
	Str.SplitOnStr("xy", StrV);
	/*
	for (int i = 0; i < StrV.Len(); i++) {
		printf("%s\n", StrV[i]);
	}*/
	//EXPECT_EQ(2, StrV.Len());
	//EXPECT_EQ(TStr("bc"), StrV[0]);
	//EXPECT_EQ(TStr("bc"), StrV[1]);
	
	//
	//// edge cases
	//Str = "a";
	//Str.SplitOnAllCh('a', StrV, true);
	//EXPECT_EQ(0, StrV.Len());
	//Str.SplitOnAllCh('a', StrV, false);
	//EXPECT_EQ(2, StrV.Len());
	//Str = "aa";
	//Str.SplitOnAllCh('a', StrV, true);
	//EXPECT_EQ(0, StrV.Len());
	//Str.SplitOnAllCh('a', StrV, false);
	//EXPECT_EQ(3, StrV.Len());
	//Str = "";
	//Str.SplitOnAllCh('a', StrV, true);
	//EXPECT_EQ(0, StrV.Len());
	//Str.SplitOnAllCh('a', StrV, false);
	//EXPECT_EQ(1, StrV.Len());
	//// non match
	//Str = "abc";
	//Str.SplitOnAllCh('x', StrV, true);
	//EXPECT_EQ(1, StrV.Len());
	//Str.SplitOnAllCh('x', StrV, false);
	//EXPECT_EQ(1, StrV.Len());
}