Example #1
0
void TMultinomial::AddFtr(const TStrV& StrV, const TFltV& FltV, TIntFltKdV& SpV) const {
    // make sure we either do not have explicit values, or their dimension matches with string keys
    EAssertR(FltV.Empty() || (StrV.Len() == FltV.Len()), "TMultinomial::AddFtr:: String and double values not aligned");
    // generate internal feature vector
    SpV.Gen(StrV.Len(), 0);
    for (int StrN = 0; StrN < StrV.Len(); StrN++) {
        const int FtrId = FtrGen.GetFtr(StrV[StrN]);
        // only use features we've seen during updates
        if (FtrId != -1) {
            const double Val = FltV.Empty() ? 1.0 : FltV[StrN].Val;
            if (Val > 1e-16) { SpV.Add(TIntFltKd(FtrId, Val)); }
        }
    }
    SpV.Sort();
    // merge elements with the same id
    int GoodSpN = 0;
    for (int SpN = 1; SpN < SpV.Len(); SpN++) {
        if (SpV[GoodSpN].Key == SpV[SpN].Key) {
            // repetition of previous id, sum counts
            SpV[GoodSpN].Dat += SpV[SpN].Dat;
        } else {
            // increase the pointer to the next good position
            GoodSpN++;
            // and move the new value down to the good position
            SpV[GoodSpN] = SpV[SpN];
        }
    }
    // truncate the vector
    SpV.Trunc(GoodSpN + 1);
    // replace values with 1 if needed
    if (IsBinary()) { for (TIntFltKd& Sp : SpV) { Sp.Dat = 1.0; } }
    // final normalization, if needed
    if (IsNormalize()) { TLinAlg::Normalize(SpV); }    
}
Example #2
0
//////////////////////////////////////////////////////////////////////////
// String-To-Words
void TStrParser::DocStrToWIdV(const TStr& _DocStr, TIntV& WordIdV, const bool& Stemm) {
    TStr DocStr = _DocStr.GetUc();  // to upper case
    TStrV WordV; DocStr.SplitOnWs(WordV); int WordN = WordV.Len();
    WordIdV.Reserve(WordN, 0);

    PStemmer Stemmer = TStemmer::New(stmtPorter);
    TIntH WordsInDoc;
    for (int WordC = 0; WordC < WordN; WordC++) {
        TStr WordStr;
        if (Stemm) {
            WordStr = Stemmer->GetStem(WordV[WordC]);
        } else {
            WordStr = WordV[WordC];
        }
        int WId = GetWId(WordStr);
        if (WId == -1) {
            WId = WordToIdH.AddKey(WordStr);
            WordToIdH[WId] = 0;
        }
        WordIdV.Add(WId);
        
        // is it first time we see this word in this doc?
        if (!WordsInDoc.IsKey(WId)) WordsInDoc.AddKey(WId);
    }

    //do some statistics for DF
    DocsParsed++;
    for (int i = 0, l = WordsInDoc.Len(); i < l; i++)
        WordToIdH[WordsInDoc.GetKey(i)]++;

    Assert(WordV.Len() == WordIdV.Len());
}
Example #3
0
//////////////////////////////////////
// URL-Redirect-Function
TSASFunRedirect::TSASFunRedirect(const TStr& FunNm,
		const TStr& SettingFNm): TSAppSrvFun(FunNm, saotUndef) { 

	printf("Loading redirects %s\n", FunNm.CStr());
	TFIn FIn(SettingFNm); TStr LnStr, OrgFunNm;
	while (FIn.GetNextLn(LnStr)) {
		TStrV PartV;  LnStr.SplitOnAllCh('\t', PartV, false);
		if (PartV.Empty()) { continue; }
		if (PartV[0].Empty()) {
			// parameters
			EAssert(PartV.Len() >= 3);
			TStr FldNm = PartV[1];
			TStr FldVal = PartV[2];
			if (FldVal.StartsWith("$")) {
				MapH.GetDat(OrgFunNm).FldNmMapH.AddDat(FldVal.Right(1), FldNm);
			} else {
				MapH.GetDat(OrgFunNm).FldNmValPrV.Add(TStrKd(FldNm, FldVal));
			}
		} else {
			// new function
			EAssert(PartV.Len() >= 2);
			OrgFunNm = PartV[0];
			MapH.AddDat(OrgFunNm).FunNm = PartV[1];
			printf("  %s - %s\n", PartV[0].CStr(), PartV[1].CStr());
		}
	}
	printf("Done\n");
}
Example #4
0
bool TBagOfWords::Update(const TStrV& TokenStrV) {    
    // Generate Ngrams if necessary
	TStrV NgramStrV;
    GenerateNgrams(TokenStrV, NgramStrV);

    // process tokens to update DF counts
    bool UpdateP = false;
    if (IsHashing()) {  
        // consolidate tokens and get their hashed IDs
        TIntSet TokenIdH;
        for (int TokenStrN = 0; TokenStrN < NgramStrV.Len(); TokenStrN++) {
            const TStr& TokenStr = NgramStrV[TokenStrN];
            TInt TokenId = TokenStr.GetHashTrick() % HashDim;
            TokenIdH.AddKey(TokenId);
            if (IsStoreHashWords()) { HashWordV[TokenId].AddKey(TokenStr); }
        }
        // update document counts
        int KeyId = TokenIdH.FFirstKeyId();
        while (TokenIdH.FNextKeyId(KeyId)) {
            const int TokenId = TokenIdH.GetKey(KeyId);
            // update DF
            DocFqV[TokenId]++;
        }
    } else {
        // consolidate tokens
        TStrH TokenStrH;
        for (int TokenStrN = 0; TokenStrN < NgramStrV.Len(); TokenStrN++) {
            const TStr& TokenStr = NgramStrV[TokenStrN];
            TokenStrH.AddKey(TokenStr);
        }
        // update document counts and update vocabulary with new tokens
        int KeyId = TokenStrH.FFirstKeyId();
        while (TokenStrH.FNextKeyId(KeyId)) {
            // get token
            const TStr& TokenStr = TokenStrH.GetKey(KeyId);
            // different processing for hashing
            int TokenId = TokenSet.GetKeyId(TokenStr);
            if (TokenId == -1) {
                // new token, remember the dimensionality change
                UpdateP = true;
                // remember the new token
                TokenId = TokenSet.AddKey(TokenStr);
                // increase document count table
                const int TokenDfId = DocFqV.Add(0);
                // increase also the old count table
                OldDocFqV.Add(0.0);
                // make sure we DF vector and TokenSet still in sync
                IAssert(TokenId == TokenDfId);
                IAssert(DocFqV.Len() == OldDocFqV.Len());
            }
            // document count update
            DocFqV[TokenId]++;
        }
    }
    // update document count
    Docs++;
    // tell if dimension changed
    return UpdateP;
}
void TTokenizer::GetTokens(const TStrV& TextV, TVec<TStrV>& TokenVV) const {
	IAssert(TextV.Len() == TokenVV.Len()); // shall we rather say Tokens.Gen(Texts.Len(), 0); ?
	for (int TextN = 0; TextN < TextV.Len(); TextN++) {
		TStrV& TokenV = TokenVV[TextN];
		TokenVV.Gen(32,0); // assume there will be at least 32 tokens, to avoid small resizes
		GetTokens(TextV[TextN], TokenV);
	}
}
Example #6
0
void TNEANetMP::Dump(FILE *OutF) const {
  const int NodePlaces = (int) ceil(log10((double) GetNodes()));
  const int EdgePlaces = (int) ceil(log10((double) GetEdges()));
  fprintf(OutF, "-------------------------------------------------\nDirected Node-Edge Network: nodes: %d, edges: %d\n", GetNodes(), GetEdges());
  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
    fprintf(OutF, "  %*d]\n", NodePlaces, NodeI.GetId());
    // load node attributes
    TIntV IntAttrN;
    IntAttrValueNI(NodeI.GetId(), IntAttrN);
    fprintf(OutF, "    nai[%d]", IntAttrN.Len());
    for (int i = 0; i < IntAttrN.Len(); i++) {
      fprintf(OutF, " %*i", NodePlaces, IntAttrN[i]()); }
    TStrV StrAttrN;
    StrAttrValueNI(NodeI.GetId(), StrAttrN);
    fprintf(OutF, "    nas[%d]", StrAttrN.Len());
    for (int i = 0; i < StrAttrN.Len(); i++) {
      fprintf(OutF, " %*s", NodePlaces, StrAttrN[i]()); }
    TFltV FltAttrN;
    FltAttrValueNI(NodeI.GetId(), FltAttrN);
    fprintf(OutF, "    naf[%d]", FltAttrN.Len());
    for (int i = 0; i < FltAttrN.Len(); i++) {
      fprintf(OutF, " %*f", NodePlaces, FltAttrN[i]()); }

    fprintf(OutF, "    in[%d]", NodeI.GetInDeg());
    for (int edge = 0; edge < NodeI.GetInDeg(); edge++) {
      fprintf(OutF, " %*d", EdgePlaces, NodeI.GetInEId(edge)); }
    fprintf(OutF, "\n");
    fprintf(OutF, "    out[%d]", NodeI.GetOutDeg());
    for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
      fprintf(OutF, " %*d", EdgePlaces, NodeI.GetOutEId(edge)); }
    fprintf(OutF, "\n");
  }
  for (TEdgeI EdgeI = BegEI(); EdgeI < EndEI(); EdgeI++) {
    fprintf(OutF, "  %*d]  %*d  ->  %*d", EdgePlaces, EdgeI.GetId(), NodePlaces, EdgeI.GetSrcNId(), NodePlaces, EdgeI.GetDstNId());

    // load edge attributes
    TIntV IntAttrE;
    IntAttrValueEI(EdgeI.GetId(), IntAttrE);
    fprintf(OutF, "    eai[%d]", IntAttrE.Len());
    for (int i = 0; i < IntAttrE.Len(); i++) {
      fprintf(OutF, " %*i", EdgePlaces, IntAttrE[i]());
    }
    TStrV StrAttrE;
    StrAttrValueEI(EdgeI.GetId(), StrAttrE);
    fprintf(OutF, "    eas[%d]", StrAttrE.Len());
    for (int i = 0; i < StrAttrE.Len(); i++) {
      fprintf(OutF, " %*s", EdgePlaces, StrAttrE[i]());
    }
    TFltV FltAttrE;
    FltAttrValueEI(EdgeI.GetId(), FltAttrE);
    fprintf(OutF, "    eaf[%d]", FltAttrE.Len());
    for (int i = 0; i < FltAttrE.Len(); i++) {
      fprintf(OutF, " %*f", EdgePlaces, FltAttrE[i]());
    }
    fprintf(OutF, "\n");
  }
  fprintf(OutF, "\n");
}
Example #7
0
v8::Local<v8::Value> TNodeJsUtil::GetStrArr(const TStrV& StrV) {
    v8::Isolate* Isolate = v8::Isolate::GetCurrent();
    v8::EscapableHandleScope EscapableHandleScope(Isolate);
    v8::Local<v8::Array> JsStrV = v8::Array::New(Isolate, StrV.Len());
    for (int StrN = 0; StrN < StrV.Len(); StrN++) {
        JsStrV->Set(StrN, v8::String::NewFromUtf8(Isolate, StrV[StrN].CStr()));
    }
    return EscapableHandleScope.Escape(JsStrV);
}
Example #8
0
// copy files for a particular folder info
void TBackupProfile::CopyFolder(const TStr& BaseTargetFolder, const TStr& SourceFolder, const TStrV& Extensions, const TStrV& SkipIfContainingV, const bool& IncludeSubfolders, const bool& ReportP, TStr& ErrMsg)
{
    try {
        // get the name of the source folder
        TStrV PathV; TDir::SplitPath(SourceFolder, PathV);
        EAssert(PathV.Len() > 0);

        // create the folder in the base target folder
        TStr TargetFolder = BaseTargetFolder + PathV[PathV.Len() - 1] + "/";
        if (!TDir::Exists(TargetFolder))
            TDir::GenDir(TargetFolder);

        // find files to be copied
        TStrV FileV;
        TFFile::GetFNmV(SourceFolder, Extensions, false, FileV);

        TStrV FolderV;
        // copy them
        for (int N = 0; N < FileV.Len(); N++) {
            // we found a file
            if (TFile::Exists(FileV[N])) {
                const TStr FileName = TDir::GetFileName(FileV[N]);
                // is this a file that we wish to ignore?
                bool ShouldCopy = true;
                for (int S = 0; S < SkipIfContainingV.Len(); S++) {
                    if (FileName.SearchStr(SkipIfContainingV[S]) >= 0)
                        ShouldCopy = false;
                }
                if (!ShouldCopy)
                    continue;
                const TStr TargetFNm = TargetFolder + FileName;
                if (ReportP)
                    TNotify::StdNotify->OnStatusFmt("Copying file: %s\r", FileName.CStr());
                TFile::Copy(FileV[N], TargetFNm);
            }
            // we found a folder
            else {
                FolderV.Add(FileV[N]);
            }
        }

        if (IncludeSubfolders) {
            for (int N = 0; N < FolderV.Len(); N++)
                CopyFolder(TargetFolder, FolderV[N], Extensions, SkipIfContainingV, IncludeSubfolders, ReportP, ErrMsg);
        }
    }
    catch (PExcept E) {
        if (ErrMsg != "")
            ErrMsg += "\n";
        ErrMsg += "Exception while copying from " + SourceFolder + ": " + E->GetMsgStr();
    }
    catch (...) {
        if (ErrMsg != "")
            ErrMsg += "\n";
        ErrMsg += "Exception while copying from " + SourceFolder + ": " + "Unrecognized exception occured.";
    }
}
Example #9
0
TStrV TEnv::GetIfArgPrefixStrV(
    const TStr& PrefixStr, TStrV& DfValV, const TStr& DNm) const {
    TStrV ArgValV;
    if (Env.GetArgs()<=MnArgs) {
        // 'usage' argument message
        if (!SilentP) {
            printf("   %s%s (default:", PrefixStr.CStr(), DNm.CStr());
            for (int DfValN=0; DfValN<DfValV.Len(); DfValN++) {
                if (DfValN>0) {
                    printf(", ");
                }
                printf("'%s'", DfValV[DfValN].CStr());
            }
            printf(")\n");
        }
        return ArgValV;
    } else {
        // argument & value message
        TStr ArgValVChA;
        for (int ArgN=0; ArgN<GetArgs(); ArgN++) {
            // get argument string
            TStr ArgStr=GetArg(ArgN);
            if (ArgStr.StartsWith(PrefixStr)) {
                // extract & add argument value
                ArgStr.DelStr(PrefixStr);
                ArgValV.Add(ArgStr);
                // add to message string
                if (ArgValV.Len()>1) {
                    ArgValVChA+=", ";
                }
                ArgValVChA+=ArgValV.Last();
            }
        }
        if (ArgValV.Empty()) {
            ArgValV=DfValV;
        }
        // output argument values
        TChA MsgChA;
        MsgChA+="  "+DNm;
        MsgChA+=" (";
        MsgChA+=PrefixStr;
        MsgChA+=")=";
        for (int ArgValN=0; ArgValN<ArgValV.Len(); ArgValN++) {
            if (ArgValN>0) {
                MsgChA+=", ";
            }
            MsgChA+="'";
            MsgChA+=ArgValV[ArgValN];
            MsgChA+="'";
        }
        if (!SilentP) {
            TNotify::OnStatus(Notify, MsgChA);
        }
        return ArgValV;
    }
}
Example #10
0
// Test drawing of SNAP graphs using GraphViz with color labeling
TEST(GVizTest, DrawGViz) {

  PUNGraph UNGraph1;
  UNGraph1 = LoadEdgeList<PUNGraph>(TStr::Fmt("%s/sample_ungraph1.txt", DIRNAME));
  
  PNGraph NGraph1;
  NGraph1 = LoadEdgeList<PNGraph>(TStr::Fmt("%s/sample_ngraph1.txt", DIRNAME));

  mkdir(DIRNAME, S_IRWXU | S_IRWXG | S_IRWXO);
  
  TStrV LNames; //  gvlDot, gvlNeato, gvlTwopi, gvlCirco
  LNames.Add("Dot");
  LNames.Add("Neato");
  LNames.Add("Twopi");
  LNames.Add("Circo");
  
  TStrV Exts;
  Exts.Add("ps");
  //Exts.Add("gif");
  Exts.Add("png");
  
  for (int i = 0; i < LNames.Len(); i++) {
    for (int e = 0; e < Exts.Len(); e++) {
      for (int d = 0; d < 2; d++) {
        
        // Baseline file has already been created (use as benchmark)
        TStr FNameBase = TStr::Fmt("%s/base_%s_%s.%s", DIRNAME, d ? "ngraph" : "ungraph" , LNames[i].CStr(), Exts[e].CStr());
        
        TStr FNameTest = TStr::Fmt("%s/test_%s_%s.%s", DIRNAME, d ? "ngraph" : "ungraph" , LNames[i].CStr(), Exts[e].CStr());
        
        // Remove test graph if it already exists
        remove(FNameTest.CStr());
        EXPECT_FALSE(fileExists(FNameTest.CStr()));
        
        // Draw new graph and check if created and equal to baseline (for ps only)
        if (d) {
          TSnap::DrawGViz(NGraph1, TGVizLayout(i), FNameTest, LNames[i], true);
        }
        else {
          TSnap::DrawGViz(UNGraph1, TGVizLayout(i), FNameTest, LNames[i], true);
        }
        // Check if file exists
        EXPECT_TRUE(fileExists(FNameTest.CStr()));
        
#ifdef __linux
        // Compare directly for ps files, (can't compare png and gif due to EXIF-labels)
        if (Exts[e] == "ps") {
          EXPECT_TRUE(compareFiles(FNameBase.CStr(), FNameTest.CStr()));
        }
#endif
      }
    }
  }

}
Example #11
0
bool TFolderBackup::RestoreLatest(const TStr& ProfileName, const TBackupProfile::ERestoringMode& RestoringMode) const
{
    if (ProfileH.IsKey(ProfileName)) {
        TStrV FolderV; GetBackupFolders(ProfileName, FolderV);
        if (FolderV.Len() > 0) {
            ProfileH.GetDat(ProfileName).Restore(FolderV[FolderV.Len()-1], RestoringMode, ReportP);
            return true;
        }
    }
    return false;
}
Example #12
0
void TFtrGenBs::Update(const TStrV& FtrValV) {
	EAssert(State == fgsUpdate);
	EAssert(FtrValV.Len() == FtrGenV.Len());
	try {
		for (int FtrValN = 0; FtrValN < FtrValV.Len(); FtrValN++) {
			FtrGenV[FtrValN]->Update(FtrValV[FtrValN]);
		}
	} catch (PExcept Ex) {
		TExcept::Throw(TStr::Fmt("Error feature generation: '%s'!", Ex->GetMsgStr().CStr()));
	}
}
/////////////////////////////////////////////////
// EuPartner
TStr TCordisEuPart::ExtrCountry(const TStr& AddrStr){
  TStr CountryStr;
  TStrV LnV; AddrStr.SplitOnStr("<br>", LnV);
  if (LnV.Len()>0){
    CountryStr=LnV.Last();
    if (CountryStr.Empty()&&(LnV.Len()>1)){
      CountryStr=LnV[LnV.Len()-2];}
    CountryStr.DelChAll('\r');
    CountryStr.DelChAll('\n');
  }
  if (CountryStr.Empty()){
    printf("Country Field Not Found!\n");}
  return CountryStr;
}
Example #14
0
void TFtrGenBs::GenFtrV(const TStrV& FtrValV, TIntFltKdV& FtrSpV) const {
	EAssert(State == fgsGen);
	EAssert(FtrValV.Len() == FtrGenV.Len());
    try {
		int Offset = 0;
        for (int FtrValN = 0; FtrValN < FtrValV.Len(); FtrValN++) {
			const PFtrGen& FtrGen = FtrGenV[FtrValN];
			const TStr& FtrVal = FtrValV[FtrValN];
            FtrGen->Add(FtrVal, FtrSpV, Offset);
        }
    } catch (PExcept Ex) {
		TExcept::Throw(TStr::Fmt("Error feature generation: '%s'!", Ex->GetMsgStr().CStr()));
    }
}
Example #15
0
TStrV TEnv::GetIfArgPrefixStrV(const TStr& PrefixStr,
                               TStrV& DfValV,
                               const TStr& DNm) const {
    TStrV ArgValV;
    if (Env.GetArgs() <= MnArgs) {
        // 'usage' argument message
        if (!SilentP) {
            printf("   %s%s (default:", PrefixStr.CStr(),
                   DNm.CStr());
            for (int DfValN = 0; DfValN < DfValV.Len();
                 DfValN++) {
                if (DfValN > 0) printf(", ");
                printf("%s", DfValV[DfValN].CStr());
            }
            printf(")\n");
        }
        return ArgValV;
    } else {
        // argument & value message
        TStrV Items;
        for (int ArgN = 0; ArgN < GetArgs(); ArgN++) {
            // get argument string
            TStr ArgStr = GetArg(ArgN);
            if (ArgStr.GetSubStr(0, PrefixStr.Len() - 1) ==
                PrefixStr) {
                TStr ArgVals = ArgStr.GetSubStr(
                    PrefixStr.Len(), ArgStr.Len());
                ArgVals.SplitOnAllCh(',', Items);
                for (int i = 0; i < Items.Len(); i++)
                    ArgValV.Add(Items[i]);
            }
        }
        if (ArgValV.Empty()) ArgValV = DfValV;
        // output argument values
        TChA MsgChA;
        MsgChA += DNm;
        MsgChA += " (";
        MsgChA += PrefixStr;
        MsgChA += ")=";
        for (int ArgValN = 0; ArgValN < ArgValV.Len();
             ArgValN++) {
            if (ArgValN > 0) MsgChA += ", ";
            MsgChA += ArgValV[ArgValN];
        }
        if (!SilentP) TNotify::OnStatus(Notify, MsgChA);
        return ArgValV;
    }
}
Example #16
0
PJsonVal TJsonVal::NewArr(const TStrV& StrV) {
  PJsonVal Val = TJsonVal::NewArr();
  for (int StrN = 0; StrN < StrV.Len(); StrN++) {
    Val->AddToArr(TJsonVal::NewStr(StrV[StrN]));
  }
  return Val;
}
Example #17
0
void TFtrGenToken::Add(const TStr& Val, TIntFltKdV& SpV, int& Offset) const {
    // step (1): tokenize
    TStrV TokenStrV; GetTokenV(Val, TokenStrV);
    // step (2): aggregate token counts
    TIntH TokenFqH;
    for (int TokenStrN = 0; TokenStrN < TokenStrV.Len(); TokenStrN++) {
        const TStr& TokenStr = TokenStrV[TokenStrN];
        if (TokenH.IsKey(TokenStr)) { 
            const int TokenId = TokenH.GetKeyId(TokenStr);
            TokenFqH.AddDat(TokenId)++;
        }
    }
    // step (3): make a sparse vector out of it
    TIntFltKdV ValSpV(TokenFqH.Len(), 0);
    int KeyId = TokenFqH.FFirstKeyId();
    while (TokenFqH.FNextKeyId(KeyId)) {
        const int TokenId = TokenFqH.GetKey(KeyId);
        const int TokenFq = TokenFqH[KeyId];
        const int TokenDocFq = TokenH[TokenId];
        const double IDF = log(double(Docs) / double(TokenDocFq));
        ValSpV.Add(TIntFltKd(TokenId, double(TokenFq) * IDF));
    }
    ValSpV.Sort(); TLinAlg::NormalizeL1(ValSpV);
    // step (4): add the sparse vector to the final feature vector  
    for (int ValSpN = 0; ValSpN < ValSpV.Len(); ValSpN++) {
        const int Key = ValSpV[ValSpN].Key + Offset;
        const double Dat = ValSpV[ValSpN].Dat;
        SpV.Add(TIntFltKd(Key, Dat));
    }
    Offset += TokenH.Len(); 
}
Example #18
0
void TGgSchRef::GetAuthNmVPubStr(
 const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){
  // split input string into two parts
  TStr AuthNmVStr; TStr PubStr;
  AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr);
  // author-names string
  AuthNmVStr.SplitOnAllCh(',', AuthNmV, true);
  for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){
    AuthNmV[AuthN].ToTrunc();
  }
  if ((!AuthNmV.Empty())&&
   ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){
    AuthNmV.DelLast();
  }
  // publication-name & publication-year string
  TStr OriginStr; TStr LinkStr;
  PubStr.SplitOnStr(OriginStr, " - ", LinkStr);
  OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr);
  PubNm.ToTrunc(); PubYearStr.ToTrunc();
  if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubYearStr.GetSubStr(0, 3);
  } else
  if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){
    PubYearStr=PubNm.GetSubStr(0, 3); PubNm="";
  } else {
    PubYearStr="";
  }
}
Example #19
0
TStr TNodeJsFPath::GetCanonicalPath(const TStr& FPath) {
    // Get absolute path
    TStr AbsFPath = TStr::GetNrAbsFPath(FPath);
    // Remove any redundancies
    TStrV CanonV; AbsFPath.SplitOnAllCh('/', CanonV);
    TSStack<TStr> CanonS; TStr CurrStr;
    for (int ElN = 0; ElN < CanonV.Len(); ++ElN) {
        CurrStr = CanonV.GetVal(ElN);
        if (CurrStr == "..") {
            EAssertR(!CanonS.Empty(), "Stack empty");
            CanonS.Pop();
        } else if (CurrStr != ".") {
            CanonS.Push(CurrStr+"/");
        }
    }
    // Assemble the canonical path (from left to right
    EAssertR(!CanonS.Empty(), "Stack empty");
    // We start with drive letter (Windows) or slash (Unix)
    TChA CanonFPath = AbsFPath.LeftOf('/'); CanonFPath += '/';
    // Get the rest of the path
    for (int CanonN = CanonS.Len() - 1; CanonN >= 0; CanonN--) {
        CanonFPath += CanonS[CanonN];
    }
    // Done
    return CanonFPath;
}
Example #20
0
void TTable::Select(TPredicate& Predicate){
  TIntV Selected;
  TStrV RelevantCols;
  Predicate.GetVariables(RelevantCols);
  TInt NumRelevantCols = RelevantCols.Len();
  TVec<TYPE> ColTypes = TVec<TYPE>(NumRelevantCols);
  for(TInt i = 0; i < NumRelevantCols; i++){
    ColTypes[i] = GetColType(RelevantCols[i]);
  } 
  
  TRowIteratorWithRemove RowI = BegRIWR();
  while(RowI.GetNextRowIdx() != Last){
    // prepare arguments for predicate evaluation
    for(TInt i = 0; i < NumRelevantCols; i++){
      switch(ColTypes[i]){
      case INT:
        Predicate.SetIntVal(RelevantCols[i], RowI.GetNextIntAttr(RelevantCols[i]));
        break;
      case FLT:
        Predicate.SetFltVal(RelevantCols[i], RowI.GetNextFltAttr(RelevantCols[i]));
        break;
      case STR:
        Predicate.SetStrVal(RelevantCols[i], RowI.GetNextStrAttr(RelevantCols[i]));
        break;
      }
    }
    if(!Predicate.Eval()){ 
      RowI.RemoveNext();
    } else{
      RowI++;
    }
  }
}
Example #21
0
void TBackupProfile::Restore(const TStr& BackupFolderName, const ERestoringMode& RestoringMode, const bool& ReportP) const
{
    for (int N = 0; N < LogV.Len(); N++) {
        // find the folder that matches the BackupFolderName
        if (LogV[N].GetFolderName() == BackupFolderName) {
            const TVec<TBackupFolderInfo> Folders = GetFolders();
            for (int N = 0; N < Folders.Len(); N++) {
                const TStr TargetFolder = Folders[N].Folder;
                TStrV PartV; TDir::SplitPath(TargetFolder, PartV);
                const TStr LastFolderNamePart = PartV[PartV.Len() - 1];

                // do we want to first remove any existing data in the target folder?
                if (RestoringMode == RemoveExistingFirst)
                    TDir::DelNonEmptyDir(TargetFolder);

                // copy data from backup to the destination folder
                const TStr SourceFolder = Destination + ProfileName + "/" + BackupFolderName + "/" + LastFolderNamePart;
                if (ReportP)
                    TNotify::StdNotify->OnStatusFmt("Copying folder: %s", SourceFolder.CStr());
                if (TDir::Exists(SourceFolder))
                    TDir::CopyDir(SourceFolder, TargetFolder, RestoringMode == OverwriteIfExisting);
                else
                    TNotify::StdNotify->OnStatusFmt("WARNING: Unable to create a restore of the folder %s. The folder does not exist.", SourceFolder.CStr());
            }
        }
    }
}
Example #22
0
File: bowfl.cpp Project: Accio/snap
void TBowFl::SaveCpdToLnDocTxt(const TStr& InCpdFNm, const TStr& OutLnDocFNm){
  TFOut FOut(OutLnDocFNm); FILE* fOut=FOut.GetFileId();
  PSIn CpDocSIn=TCpDoc::FFirstCpd(InCpdFNm); PCpDoc CpDoc; int Docs=0;
  printf("Saving '%s' to '%s' ...\n", InCpdFNm.CStr(), OutLnDocFNm.CStr());
  while (TCpDoc::FNextCpd(CpDocSIn, CpDoc)){
    Docs++; if (Docs%100==0){printf("%d Docs\r", Docs);}
    // get document-name
    TStr DocNm=CpDoc->GetDocNm();
    DocNm=TStr::GetFNmStr(DocNm);
    DocNm.ChangeChAll(' ', '_');
    // get document-categories
    TStrV CatNmV;
    for (int CatN=0; CatN<CpDoc->GetCats(); CatN++){
      CatNmV.Add(CpDoc->GetCatNm(CatN));}
    // get document-contents
    TChA DocChA=CpDoc->GetTxtStr();
    DocChA.ChangeCh('\r', ' ');
    DocChA.ChangeCh('\n', ' ');
    // save document
    fprintf(fOut, "%s", DocNm.CStr());
    for (int CatN=0; CatN<CatNmV.Len(); CatN++){
      fprintf(fOut, " !%s", CatNmV[CatN].CStr());}
    fprintf(fOut, " %s\n", DocChA.CStr());
  }
  printf("%d Docs\nDone.\n", Docs);
}
PAlignPair TAlignPair::LoadAcXml(const TStr& FNm, const int& MxSents) {
    printf("Loading %s ...\n", FNm.CStr());
    // get the lanugagne names
    TStr BaseNm = FNm.GetFMid();
    TStrV PartV; BaseNm.SplitOnAllCh('-', PartV);
    IAssertR(PartV.Len() == 3, "Bad file name: " + BaseNm);
    // prepare aligne pair
    PAlignPair AlignPair = TAlignPair::New(PartV[1], PartV[2]);
    // parse the XML
    PTransCorpus TransCorpus = TTransCorpus::LoadAC(FNm, MxSents * 4);
    // select subset of sentences which will go into aligned corpus
    const int AllSents = TransCorpus->GetSentences();
    TIntV SentIdV(AllSents, 0);
    for (int SentId = 0; SentId < AllSents; SentId++) {
        SentIdV.Add(SentId); 
    }
    if (MxSents != -1 && AllSents > MxSents) {
    	TRnd Rnd(1);
        SentIdV.Shuffle(Rnd);
        SentIdV.Trunc(MxSents);
    }
    // add the sentences to the bow
    const int Sents = SentIdV.Len();
    for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        const TStr& Sent1 = TransCorpus->GetOrgStr(SentId);
        const TStr& Sent2 = TransCorpus->GetRefTransStrV(SentId)[0];
        AlignPair->AddSent(Sent1, Sent2);
    }
    // finish the alignment pair
    AlignPair->Def();
    return AlignPair;
}
void TCordisEuPart::ExtrEuPartV(const TStr& AllEuPartStr, TCordisEuPartV& EuPartV){
  TStrV EuPartStrV; AllEuPartStr.SplitOnStr("Organisation Type:", EuPartStrV);
  for (int EuPartN=1; EuPartN<EuPartStrV.Len(); EuPartN++){
    TStr EuPartStr=EuPartStrV[EuPartN];
    TStr WcStr=
     "</span>*</td>"
     "*Organisation:</span>*<br>"
     "*</td>*";
    TStrV StarStrV;
    if (EuPartStr.IsWcMatch(WcStr, StarStrV)){
      PCordisEuPart EuPart=TCordisEuPart::New();
      EuPart->DeptNm=ExtrDeptNm(EuPartStr);
      EuPart->OrgNm=TXmlLx::GetPlainStrFromXmlStr(StarStrV[2].GetTrunc());
      EuPart->OrgTypeNm=StarStrV[0].GetTrunc();
      EuPart->CountryNm=ExtrCountry(StarStrV[3].GetTrunc());
      EuPart->CoordP=(EuPartN==1);
      printf("   Partner: '%s'/'%s'/'%s'/'%s'/'%s'\n",
       EuPart->DeptNm.CStr(), EuPart->OrgNm.CStr(),
       EuPart->OrgTypeNm.CStr(), EuPart->CountryNm.CStr(),
       TBool::GetStr(EuPart->CoordP).CStr());
      EuPartV.Add(EuPart);
    } else {
      printf("Partner Fields Not Found!\n");
    }
  }
}
Example #25
0
// Directory is timestamped in the web directory by default.
void LogOutput::SetupNewOutputDirectory(TStr Directory) {
  if (!ShouldLog) return;
  // CASE 1: We need to set up a new directory
  if (Directory == TStr("")) {
    TSecTm Tm = TSecTm::GetCurTm();
    TStr TimeStamp = Tm.GetDtYmdStr() + "_" + Tm.GetTmStr();
    this->Directory = WebDirectory + TimeStamp;
    Err("New directory set up: %s\n", this->Directory.CStr());
  } else {
    // CASE 2: The user has specified a directory - we just have to store it
    this->Directory = Directory;
    Err("Using existing directory: %s\n", this->Directory.CStr());
  }

  // Copy directories over.
  TStrV Commands;
  Commands.Add("mkdir -p " + this->Directory);
  Commands.Add("cp -r resources/output/text " + this->Directory);
  Commands.Add("cp -r resources/output/web " + this->Directory);

  for (int i = 0; i < Commands.Len(); i++) {
    system(Commands[i].CStr());
  }
  Err("Necessary files copied over to %s\n", this->Directory.CStr());
}
Example #26
0
PNGramBs TNGramBs::GetNGramBsFromHtmlStrV(
 const TStrV& HtmlStrV,
 const int& MxNGramLen, const int& MnNGramFq,
 const PSwSet& SwSet, const PStemmer& Stemmer){
  // create n-gram-base
  /* printf("Generating frequent n-grams (MaxLen:%d MinFq:%d) ...\n", MxNGramLen, MnNGramFq); */
  PNGramBs NGramBs=TNGramBs::New(MxNGramLen, MnNGramFq, SwSet, Stemmer);
  // interations over document-set
  while (!NGramBs->IsFinished()){
    for (int HtmlStrN=0; HtmlStrN<HtmlStrV.Len(); HtmlStrN++){
      /* if ((HtmlStrN%10==0)||(HtmlStrN+1==HtmlStrV.Len())){ */
        /* printf("  Pass %2d: %6d/%6d Docs\r", NGramBs->GetPassN(), HtmlStrN+1, HtmlStrV.Len()); */
        /* if (HtmlStrN+1==HtmlStrV.Len()){ */
            /* printf("\n"); */
        /* } */
      /* } */
      // extract words & update ngram-base
      _UpdateNGramBsFromHtmlStr(NGramBs, HtmlStrV[HtmlStrN], SwSet, Stemmer);
    }
    NGramBs->ConcPass();
  }
  /* printf("Done.\n"); */
  // return
  return NGramBs;
}
Example #27
0
void TWebPgFetchEvent::OnFetchEnd(const PHttpResp& HttpResp){
  EAssert(HttpResp->IsOk());
  EndMSecs=TTm::GetCurUniMSecs();
  int StatusCd=HttpResp->GetStatusCd();
  if (StatusCd/100==2){ // codes 2XX - ok
    ChangeLastUrlToLc(HttpResp);
    PWebPg WebPg=TWebPg::New(UrlStrV, IpNumV, HttpResp);
    WebPg->PutFetchMSecs(GetFetchMSecs());
    CloseConn(); Fetch->DisconnUrl(FId);
    Fetch->OnFetch(FId, WebPg);
  } else
  if (StatusCd/100==3){ // codes 3XX - redirection
    ChangeLastUrlToLc(HttpResp);
    if (UrlStrV.Len()<5){
      TStr RedirUrlStr=HttpResp->GetFldVal("Location");
      PUrl RedirUrl=TUrl::New(RedirUrlStr, CurUrl->GetUrlStr());
      if (RedirUrl->IsOk(usHttp)){
        TStrQuV CookieQuV; HttpResp->GetCookieKeyValDmPathQuV(CookieQuV);
        AddCookieKeyValDmPathQuV(CookieQuV);
        CurUrl=RedirUrl; Retries=0; CloseConn();
        TSockHost::GetAsyncSockHost(CurUrl->GetIpNumOrHostNm(), this);
      } else {
        TStr MsgStr=TStr("Invalid Redirection URL (")+RedirUrlStr+")";
        OnFetchError(MsgStr);
      }
    } else {
      TStr MsgStr=TStr("Cycling Redirection [")+TopUrl->GetUrlStr()+"]";
      OnFetchError(MsgStr);
    }
  } else { // all other codes - error
    TStr MsgStr=TStr("Http Error (")+
     TInt::GetStr(StatusCd)+"/"+HttpResp->GetReasonPhrase()+")";
    OnFetchError(MsgStr);
  }
}
int TTransCorpus::CountWords(const TIntStrVH& StrVH, const bool& AllSents) {
    int Words = 0, KeyId = StrVH.FFirstKeyId();
    while (StrVH.FNextKeyId(KeyId)) {
        const TStrV& StrV = StrVH[KeyId];
        if (AllSents) { 
            for (int StrN = 0; StrN < StrV.Len(); StrN++) {                
                TStrV WordV; StrV[StrN].SplitOnWs(WordV);
                Words += WordV.Len();
            }
        } else {
            TStrV WordV; StrV[0].SplitOnWs(WordV);
            Words += WordV.Len();
        }
    }
    return Words;
}
Example #29
0
void ReadGraph(TStr& InFile, bool& Directed, bool& Weighted, bool& Verbose, PWNet& InNet) {
  TFIn FIn(InFile);
  int64 LineCnt = 0;
  try {
    while (!FIn.Eof()) {
      TStr Ln;
      FIn.GetNextLn(Ln);
      TStr Line, Comment;
      Ln.SplitOnCh(Line,'#',Comment);
      TStrV Tokens;
      Line.SplitOnWs(Tokens);
      if(Tokens.Len()<2){ continue; }
      int64 SrcNId = Tokens[0].GetInt();
      int64 DstNId = Tokens[1].GetInt();
      double Weight = 1.0;
      if (Weighted) { Weight = Tokens[2].GetFlt(); }
      if (!InNet->IsNode(SrcNId)){ InNet->AddNode(SrcNId); }
      if (!InNet->IsNode(DstNId)){ InNet->AddNode(DstNId); }
      InNet->AddEdge(SrcNId,DstNId,Weight);
      if (!Directed){ InNet->AddEdge(DstNId,SrcNId,Weight); }
      LineCnt++;
    }
    if (Verbose) { printf("Read %lld lines from %s\n", (long long)LineCnt, InFile.CStr()); }
  } catch (PExcept Except) {
    if (Verbose) {
      printf("Read %lld lines from %s, then %s\n", (long long)LineCnt, InFile.CStr(),
       Except->GetStr().CStr());
    }
  }
}
Example #30
0
PLwOnto TLwOnto::LoadVoc(const TStr& FNm) {
  // create ontology
  PLwOnto LwOnto = TLwOnto::New();
  // create language object
  int EnLangId = LwOnto->GetLangBs()->AddLang("EN", "English");
  // create term-types
  PLwTermType TermType = TLwTermType::New(0, "Term", EnLangId);
  LwOnto->GetTermTypeBs()->AddTermType(TermType);
  // create terms
  {printf("Creating terms ...\n");
  // load terms from file
  TStr VocFileStr = TStr::LoadTxt(FNm); 
  VocFileStr.DelChAll('\r');
  TStrV TermNmV; VocFileStr.SplitOnAllCh('\n', TermNmV);
  // add terms to base
  const int Terms = TermNmV.Len();
  for (int TermId = 0; TermId<Terms; TermId++){
    if (TermId%1000==0){printf("%d/%d\r", TermId, Terms);}
    TStr TermNm = TermNmV[TermId];
    // create term
    PLwTerm Term=TLwTerm::New(TermId, TermNm, EnLangId, TermType->GetTermTypeId());
    LwOnto->GetTermBs()->AddTerm(Term);
  }
  printf("Done. (%d)\n", LwOnto->GetTermBs()->GetTerms());}
  // return ontology
  return LwOnto;
}